ztest.c revision 258717
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23254112Sdelphij * Copyright (c) 2013 by Delphix. All rights reserved. 24228103Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 25236143Smm * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. 26251646Sdelphij * Copyright (c) 2013 Steven Hartland. All rights reserved. 27168404Spjd */ 28168404Spjd 29168404Spjd/* 30168404Spjd * The objective of this program is to provide a DMU/ZAP/SPA stress test 31168404Spjd * that runs entirely in userland, is easy to use, and easy to extend. 32168404Spjd * 33168404Spjd * The overall design of the ztest program is as follows: 34168404Spjd * 35168404Spjd * (1) For each major functional area (e.g. adding vdevs to a pool, 36168404Spjd * creating and destroying datasets, reading and writing objects, etc) 37168404Spjd * we have a simple routine to test that functionality. These 38168404Spjd * individual routines do not have to do anything "stressful". 39168404Spjd * 40168404Spjd * (2) We turn these simple functionality tests into a stress test by 41168404Spjd * running them all in parallel, with as many threads as desired, 42168404Spjd * and spread across as many datasets, objects, and vdevs as desired. 43168404Spjd * 44168404Spjd * (3) While all this is happening, we inject faults into the pool to 45168404Spjd * verify that self-healing data really works. 46168404Spjd * 47168404Spjd * (4) Every time we open a dataset, we change its checksum and compression 48168404Spjd * functions. Thus even individual objects vary from block to block 49168404Spjd * in which checksum they use and whether they're compressed. 50168404Spjd * 51168404Spjd * (5) To verify that we never lose on-disk consistency after a crash, 52168404Spjd * we run the entire test in a child of the main process. 53168404Spjd * At random times, the child self-immolates with a SIGKILL. 54168404Spjd * This is the software equivalent of pulling the power cord. 55168404Spjd * The parent then runs the test again, using the existing 56236143Smm * storage pool, as many times as desired. If backwards compatability 57236143Smm * testing is enabled ztest will sometimes run the "older" version 58236143Smm * of ztest after a SIGKILL. 59168404Spjd * 60168404Spjd * (6) To verify that we don't have future leaks or temporal incursions, 61168404Spjd * many of the functional tests record the transaction group number 62168404Spjd * as part of their data. When reading old data, they verify that 63168404Spjd * the transaction group number is less than the current, open txg. 64168404Spjd * If you add a new test, please do this if applicable. 65168404Spjd * 66168404Spjd * When run with no arguments, ztest runs for about five minutes and 67168404Spjd * produces no output if successful. To get a little bit of information, 68168404Spjd * specify -V. To get more information, specify -VV, and so on. 69168404Spjd * 70168404Spjd * To turn this into an overnight stress test, use -T to specify run time. 71168404Spjd * 72168404Spjd * You can ask more more vdevs [-v], datasets [-d], or threads [-t] 73168404Spjd * to increase the pool capacity, fanout, and overall stress level. 74168404Spjd * 75236143Smm * Use the -k option to set the desired frequency of kills. 76236143Smm * 77236143Smm * When ztest invokes itself it passes all relevant information through a 78236143Smm * temporary file which is mmap-ed in the child process. This allows shared 79236143Smm * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always 80236143Smm * stored at offset 0 of this file and contains information on the size and 81236143Smm * number of shared structures in the file. The information stored in this file 82236143Smm * must remain backwards compatible with older versions of ztest so that 83236143Smm * ztest can invoke them during backwards compatibility testing (-B). 84168404Spjd */ 85168404Spjd 86168404Spjd#include <sys/zfs_context.h> 87168404Spjd#include <sys/spa.h> 88168404Spjd#include <sys/dmu.h> 89168404Spjd#include <sys/txg.h> 90209962Smm#include <sys/dbuf.h> 91168404Spjd#include <sys/zap.h> 92168404Spjd#include <sys/dmu_objset.h> 93168404Spjd#include <sys/poll.h> 94168404Spjd#include <sys/stat.h> 95168404Spjd#include <sys/time.h> 96168404Spjd#include <sys/wait.h> 97168404Spjd#include <sys/mman.h> 98168404Spjd#include <sys/resource.h> 99168404Spjd#include <sys/zio.h> 100168404Spjd#include <sys/zil.h> 101219089Spjd#include <sys/zil_impl.h> 102168404Spjd#include <sys/vdev_impl.h> 103185029Spjd#include <sys/vdev_file.h> 104168404Spjd#include <sys/spa_impl.h> 105219089Spjd#include <sys/metaslab_impl.h> 106168404Spjd#include <sys/dsl_prop.h> 107207910Smm#include <sys/dsl_dataset.h> 108248571Smm#include <sys/dsl_destroy.h> 109219089Spjd#include <sys/dsl_scan.h> 110219089Spjd#include <sys/zio_checksum.h> 111168404Spjd#include <sys/refcount.h> 112236884Smm#include <sys/zfeature.h> 113248571Smm#include <sys/dsl_userhold.h> 114168404Spjd#include <stdio.h> 115168404Spjd#include <stdio_ext.h> 116168404Spjd#include <stdlib.h> 117168404Spjd#include <unistd.h> 118168404Spjd#include <signal.h> 119168404Spjd#include <umem.h> 120168404Spjd#include <dlfcn.h> 121168404Spjd#include <ctype.h> 122168404Spjd#include <math.h> 123168404Spjd#include <errno.h> 124168404Spjd#include <sys/fs/zfs.h> 125219089Spjd#include <libnvpair.h> 126168404Spjd 127242845Sdelphijstatic int ztest_fd_data = -1; 128242845Sdelphijstatic int ztest_fd_rand = -1; 129168404Spjd 130236143Smmtypedef struct ztest_shared_hdr { 131236143Smm uint64_t zh_hdr_size; 132236143Smm uint64_t zh_opts_size; 133236143Smm uint64_t zh_size; 134236143Smm uint64_t zh_stats_size; 135236143Smm uint64_t zh_stats_count; 136236143Smm uint64_t zh_ds_size; 137236143Smm uint64_t zh_ds_count; 138236143Smm} ztest_shared_hdr_t; 139168404Spjd 140236143Smmstatic ztest_shared_hdr_t *ztest_shared_hdr; 141236143Smm 142236143Smmtypedef struct ztest_shared_opts { 143236143Smm char zo_pool[MAXNAMELEN]; 144236143Smm char zo_dir[MAXNAMELEN]; 145236143Smm char zo_alt_ztest[MAXNAMELEN]; 146236143Smm char zo_alt_libpath[MAXNAMELEN]; 147236143Smm uint64_t zo_vdevs; 148236143Smm uint64_t zo_vdevtime; 149236143Smm size_t zo_vdev_size; 150236143Smm int zo_ashift; 151236143Smm int zo_mirrors; 152236143Smm int zo_raidz; 153236143Smm int zo_raidz_parity; 154236143Smm int zo_datasets; 155236143Smm int zo_threads; 156236143Smm uint64_t zo_passtime; 157236143Smm uint64_t zo_killrate; 158236143Smm int zo_verbose; 159236143Smm int zo_init; 160236143Smm uint64_t zo_time; 161236143Smm uint64_t zo_maxloops; 162236143Smm uint64_t zo_metaslab_gang_bang; 163236143Smm} ztest_shared_opts_t; 164236143Smm 165236143Smmstatic const ztest_shared_opts_t ztest_opts_defaults = { 166236143Smm .zo_pool = { 'z', 't', 'e', 's', 't', '\0' }, 167236143Smm .zo_dir = { '/', 't', 'm', 'p', '\0' }, 168236143Smm .zo_alt_ztest = { '\0' }, 169236143Smm .zo_alt_libpath = { '\0' }, 170236143Smm .zo_vdevs = 5, 171236143Smm .zo_ashift = SPA_MINBLOCKSHIFT, 172236143Smm .zo_mirrors = 2, 173236143Smm .zo_raidz = 4, 174236143Smm .zo_raidz_parity = 1, 175236143Smm .zo_vdev_size = SPA_MINDEVSIZE, 176236143Smm .zo_datasets = 7, 177236143Smm .zo_threads = 23, 178236143Smm .zo_passtime = 60, /* 60 seconds */ 179236143Smm .zo_killrate = 70, /* 70% kill rate */ 180236143Smm .zo_verbose = 0, 181236143Smm .zo_init = 1, 182236143Smm .zo_time = 300, /* 5 minutes */ 183236143Smm .zo_maxloops = 50, /* max loops during spa_freeze() */ 184236143Smm .zo_metaslab_gang_bang = 32 << 10 185236143Smm}; 186236143Smm 187236143Smmextern uint64_t metaslab_gang_bang; 188236143Smmextern uint64_t metaslab_df_alloc_threshold; 189258632Savgextern uint64_t zfs_deadman_synctime_ms; 190236143Smm 191236143Smmstatic ztest_shared_opts_t *ztest_shared_opts; 192236143Smmstatic ztest_shared_opts_t ztest_opts; 193236143Smm 194236143Smmtypedef struct ztest_shared_ds { 195236143Smm uint64_t zd_seq; 196236143Smm} ztest_shared_ds_t; 197236143Smm 198236143Smmstatic ztest_shared_ds_t *ztest_shared_ds; 199236143Smm#define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d]) 200236143Smm 201219089Spjd#define BT_MAGIC 0x123456789abcdefULL 202236143Smm#define MAXFAULTS() \ 203236143Smm (MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1) 204219089Spjd 205219089Spjdenum ztest_io_type { 206219089Spjd ZTEST_IO_WRITE_TAG, 207219089Spjd ZTEST_IO_WRITE_PATTERN, 208219089Spjd ZTEST_IO_WRITE_ZEROES, 209219089Spjd ZTEST_IO_TRUNCATE, 210219089Spjd ZTEST_IO_SETATTR, 211243524Smm ZTEST_IO_REWRITE, 212219089Spjd ZTEST_IO_TYPES 213219089Spjd}; 214219089Spjd 215185029Spjdtypedef struct ztest_block_tag { 216219089Spjd uint64_t bt_magic; 217185029Spjd uint64_t bt_objset; 218185029Spjd uint64_t bt_object; 219185029Spjd uint64_t bt_offset; 220219089Spjd uint64_t bt_gen; 221185029Spjd uint64_t bt_txg; 222219089Spjd uint64_t bt_crtxg; 223185029Spjd} ztest_block_tag_t; 224185029Spjd 225219089Spjdtypedef struct bufwad { 226219089Spjd uint64_t bw_index; 227219089Spjd uint64_t bw_txg; 228219089Spjd uint64_t bw_data; 229219089Spjd} bufwad_t; 230168404Spjd 231219089Spjd/* 232219089Spjd * XXX -- fix zfs range locks to be generic so we can use them here. 233219089Spjd */ 234219089Spjdtypedef enum { 235219089Spjd RL_READER, 236219089Spjd RL_WRITER, 237219089Spjd RL_APPEND 238219089Spjd} rl_type_t; 239168404Spjd 240219089Spjdtypedef struct rll { 241219089Spjd void *rll_writer; 242219089Spjd int rll_readers; 243219089Spjd mutex_t rll_lock; 244219089Spjd cond_t rll_cv; 245219089Spjd} rll_t; 246219089Spjd 247219089Spjdtypedef struct rl { 248219089Spjd uint64_t rl_object; 249219089Spjd uint64_t rl_offset; 250219089Spjd uint64_t rl_size; 251219089Spjd rll_t *rl_lock; 252219089Spjd} rl_t; 253219089Spjd 254219089Spjd#define ZTEST_RANGE_LOCKS 64 255219089Spjd#define ZTEST_OBJECT_LOCKS 64 256219089Spjd 257168404Spjd/* 258219089Spjd * Object descriptor. Used as a template for object lookup/create/remove. 259219089Spjd */ 260219089Spjdtypedef struct ztest_od { 261219089Spjd uint64_t od_dir; 262219089Spjd uint64_t od_object; 263219089Spjd dmu_object_type_t od_type; 264219089Spjd dmu_object_type_t od_crtype; 265219089Spjd uint64_t od_blocksize; 266219089Spjd uint64_t od_crblocksize; 267219089Spjd uint64_t od_gen; 268219089Spjd uint64_t od_crgen; 269219089Spjd char od_name[MAXNAMELEN]; 270219089Spjd} ztest_od_t; 271219089Spjd 272219089Spjd/* 273219089Spjd * Per-dataset state. 274219089Spjd */ 275219089Spjdtypedef struct ztest_ds { 276236143Smm ztest_shared_ds_t *zd_shared; 277219089Spjd objset_t *zd_os; 278224526Smm rwlock_t zd_zilog_lock; 279219089Spjd zilog_t *zd_zilog; 280219089Spjd ztest_od_t *zd_od; /* debugging aid */ 281219089Spjd char zd_name[MAXNAMELEN]; 282219089Spjd mutex_t zd_dirobj_lock; 283219089Spjd rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; 284219089Spjd rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; 285219089Spjd} ztest_ds_t; 286219089Spjd 287219089Spjd/* 288219089Spjd * Per-iteration state. 289219089Spjd */ 290219089Spjdtypedef void ztest_func_t(ztest_ds_t *zd, uint64_t id); 291219089Spjd 292219089Spjdtypedef struct ztest_info { 293219089Spjd ztest_func_t *zi_func; /* test function */ 294219089Spjd uint64_t zi_iters; /* iterations per execution */ 295219089Spjd uint64_t *zi_interval; /* execute every <interval> seconds */ 296219089Spjd} ztest_info_t; 297219089Spjd 298236143Smmtypedef struct ztest_shared_callstate { 299236143Smm uint64_t zc_count; /* per-pass count */ 300236143Smm uint64_t zc_time; /* per-pass time */ 301236143Smm uint64_t zc_next; /* next time to call this function */ 302236143Smm} ztest_shared_callstate_t; 303236143Smm 304236143Smmstatic ztest_shared_callstate_t *ztest_shared_callstate; 305236143Smm#define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c]) 306236143Smm 307219089Spjd/* 308168404Spjd * Note: these aren't static because we want dladdr() to work. 309168404Spjd */ 310168404Spjdztest_func_t ztest_dmu_read_write; 311168404Spjdztest_func_t ztest_dmu_write_parallel; 312168404Spjdztest_func_t ztest_dmu_object_alloc_free; 313219089Spjdztest_func_t ztest_dmu_commit_callbacks; 314168404Spjdztest_func_t ztest_zap; 315168404Spjdztest_func_t ztest_zap_parallel; 316219089Spjdztest_func_t ztest_zil_commit; 317224526Smmztest_func_t ztest_zil_remount; 318219089Spjdztest_func_t ztest_dmu_read_write_zcopy; 319168404Spjdztest_func_t ztest_dmu_objset_create_destroy; 320219089Spjdztest_func_t ztest_dmu_prealloc; 321219089Spjdztest_func_t ztest_fzap; 322168404Spjdztest_func_t ztest_dmu_snapshot_create_destroy; 323219089Spjdztest_func_t ztest_dsl_prop_get_set; 324219089Spjdztest_func_t ztest_spa_prop_get_set; 325168404Spjdztest_func_t ztest_spa_create_destroy; 326168404Spjdztest_func_t ztest_fault_inject; 327219089Spjdztest_func_t ztest_ddt_repair; 328219089Spjdztest_func_t ztest_dmu_snapshot_hold; 329185029Spjdztest_func_t ztest_spa_rename; 330219089Spjdztest_func_t ztest_scrub; 331219089Spjdztest_func_t ztest_dsl_dataset_promote_busy; 332168404Spjdztest_func_t ztest_vdev_attach_detach; 333168404Spjdztest_func_t ztest_vdev_LUN_growth; 334168404Spjdztest_func_t ztest_vdev_add_remove; 335185029Spjdztest_func_t ztest_vdev_aux_add_remove; 336219089Spjdztest_func_t ztest_split_pool; 337228103Smmztest_func_t ztest_reguid; 338243505Smmztest_func_t ztest_spa_upgrade; 339168404Spjd 340219089Spjduint64_t zopt_always = 0ULL * NANOSEC; /* all the time */ 341219089Spjduint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */ 342219089Spjduint64_t zopt_often = 1ULL * NANOSEC; /* every second */ 343219089Spjduint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */ 344219089Spjduint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */ 345168404Spjd 346168404Spjdztest_info_t ztest_info[] = { 347185029Spjd { ztest_dmu_read_write, 1, &zopt_always }, 348219089Spjd { ztest_dmu_write_parallel, 10, &zopt_always }, 349185029Spjd { ztest_dmu_object_alloc_free, 1, &zopt_always }, 350219089Spjd { ztest_dmu_commit_callbacks, 1, &zopt_always }, 351185029Spjd { ztest_zap, 30, &zopt_always }, 352185029Spjd { ztest_zap_parallel, 100, &zopt_always }, 353219089Spjd { ztest_split_pool, 1, &zopt_always }, 354219089Spjd { ztest_zil_commit, 1, &zopt_incessant }, 355224526Smm { ztest_zil_remount, 1, &zopt_sometimes }, 356219089Spjd { ztest_dmu_read_write_zcopy, 1, &zopt_often }, 357219089Spjd { ztest_dmu_objset_create_destroy, 1, &zopt_often }, 358219089Spjd { ztest_dsl_prop_get_set, 1, &zopt_often }, 359219089Spjd { ztest_spa_prop_get_set, 1, &zopt_sometimes }, 360219089Spjd#if 0 361219089Spjd { ztest_dmu_prealloc, 1, &zopt_sometimes }, 362219089Spjd#endif 363219089Spjd { ztest_fzap, 1, &zopt_sometimes }, 364219089Spjd { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, 365219089Spjd { ztest_spa_create_destroy, 1, &zopt_sometimes }, 366185029Spjd { ztest_fault_inject, 1, &zopt_sometimes }, 367219089Spjd { ztest_ddt_repair, 1, &zopt_sometimes }, 368219089Spjd { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, 369254074Sdelphij { ztest_reguid, 1, &zopt_rarely }, 370185029Spjd { ztest_spa_rename, 1, &zopt_rarely }, 371219089Spjd { ztest_scrub, 1, &zopt_rarely }, 372243505Smm { ztest_spa_upgrade, 1, &zopt_rarely }, 373219089Spjd { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, 374248571Smm { ztest_vdev_attach_detach, 1, &zopt_sometimes }, 375185029Spjd { ztest_vdev_LUN_growth, 1, &zopt_rarely }, 376236143Smm { ztest_vdev_add_remove, 1, 377236143Smm &ztest_opts.zo_vdevtime }, 378236143Smm { ztest_vdev_aux_add_remove, 1, 379236143Smm &ztest_opts.zo_vdevtime }, 380168404Spjd}; 381168404Spjd 382168404Spjd#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) 383168404Spjd 384219089Spjd/* 385219089Spjd * The following struct is used to hold a list of uncalled commit callbacks. 386219089Spjd * The callbacks are ordered by txg number. 387219089Spjd */ 388219089Spjdtypedef struct ztest_cb_list { 389219089Spjd mutex_t zcl_callbacks_lock; 390219089Spjd list_t zcl_callbacks; 391219089Spjd} ztest_cb_list_t; 392168404Spjd 393168404Spjd/* 394168404Spjd * Stuff we need to share writably between parent and child. 395168404Spjd */ 396168404Spjdtypedef struct ztest_shared { 397236143Smm boolean_t zs_do_init; 398219089Spjd hrtime_t zs_proc_start; 399219089Spjd hrtime_t zs_proc_stop; 400219089Spjd hrtime_t zs_thread_start; 401219089Spjd hrtime_t zs_thread_stop; 402219089Spjd hrtime_t zs_thread_kill; 403219089Spjd uint64_t zs_enospc_count; 404219089Spjd uint64_t zs_vdev_next_leaf; 405185029Spjd uint64_t zs_vdev_aux; 406168404Spjd uint64_t zs_alloc; 407168404Spjd uint64_t zs_space; 408219089Spjd uint64_t zs_splits; 409219089Spjd uint64_t zs_mirrors; 410236143Smm uint64_t zs_metaslab_sz; 411236143Smm uint64_t zs_metaslab_df_alloc_threshold; 412236143Smm uint64_t zs_guid; 413168404Spjd} ztest_shared_t; 414168404Spjd 415219089Spjd#define ID_PARALLEL -1ULL 416219089Spjd 417168404Spjdstatic char ztest_dev_template[] = "%s/%s.%llua"; 418185029Spjdstatic char ztest_aux_template[] = "%s/%s.%s.%llu"; 419219089Spjdztest_shared_t *ztest_shared; 420168404Spjd 421236143Smmstatic spa_t *ztest_spa = NULL; 422236143Smmstatic ztest_ds_t *ztest_ds; 423168404Spjd 424236143Smmstatic mutex_t ztest_vdev_lock; 425239620Smm 426239620Smm/* 427239620Smm * The ztest_name_lock protects the pool and dataset namespace used by 428239620Smm * the individual tests. To modify the namespace, consumers must grab 429239620Smm * this lock as writer. Grabbing the lock as reader will ensure that the 430239620Smm * namespace does not change while the lock is held. 431239620Smm */ 432236143Smmstatic rwlock_t ztest_name_lock; 433236143Smm 434236143Smmstatic boolean_t ztest_dump_core = B_TRUE; 435185029Spjdstatic boolean_t ztest_exiting; 436168404Spjd 437219089Spjd/* Global commit callback list */ 438219089Spjdstatic ztest_cb_list_t zcl; 439219089Spjd 440219089Spjdenum ztest_object { 441219089Spjd ZTEST_META_DNODE = 0, 442219089Spjd ZTEST_DIROBJ, 443219089Spjd ZTEST_OBJECTS 444219089Spjd}; 445168404Spjd 446168676Spjdstatic void usage(boolean_t) __NORETURN; 447168498Spjd 448168404Spjd/* 449168404Spjd * These libumem hooks provide a reasonable set of defaults for the allocator's 450168404Spjd * debugging facilities. 451168404Spjd */ 452168404Spjdconst char * 453168404Spjd_umem_debug_init() 454168404Spjd{ 455168404Spjd return ("default,verbose"); /* $UMEM_DEBUG setting */ 456168404Spjd} 457168404Spjd 458168404Spjdconst char * 459168404Spjd_umem_logging_init(void) 460168404Spjd{ 461168404Spjd return ("fail,contents"); /* $UMEM_LOGGING setting */ 462168404Spjd} 463168404Spjd 464168404Spjd#define FATAL_MSG_SZ 1024 465168404Spjd 466168404Spjdchar *fatal_msg; 467168404Spjd 468168404Spjdstatic void 469168404Spjdfatal(int do_perror, char *message, ...) 470168404Spjd{ 471168404Spjd va_list args; 472168404Spjd int save_errno = errno; 473168404Spjd char buf[FATAL_MSG_SZ]; 474168404Spjd 475168404Spjd (void) fflush(stdout); 476168404Spjd 477168404Spjd va_start(args, message); 478168404Spjd (void) sprintf(buf, "ztest: "); 479168404Spjd /* LINTED */ 480168404Spjd (void) vsprintf(buf + strlen(buf), message, args); 481168404Spjd va_end(args); 482168404Spjd if (do_perror) { 483168404Spjd (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf), 484168404Spjd ": %s", strerror(save_errno)); 485168404Spjd } 486168404Spjd (void) fprintf(stderr, "%s\n", buf); 487168404Spjd fatal_msg = buf; /* to ease debugging */ 488168404Spjd if (ztest_dump_core) 489168404Spjd abort(); 490168404Spjd exit(3); 491168404Spjd} 492168404Spjd 493168404Spjdstatic int 494168404Spjdstr2shift(const char *buf) 495168404Spjd{ 496168404Spjd const char *ends = "BKMGTPEZ"; 497168404Spjd int i; 498168404Spjd 499168404Spjd if (buf[0] == '\0') 500168404Spjd return (0); 501168404Spjd for (i = 0; i < strlen(ends); i++) { 502168404Spjd if (toupper(buf[0]) == ends[i]) 503168404Spjd break; 504168404Spjd } 505168498Spjd if (i == strlen(ends)) { 506168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", 507168498Spjd buf); 508168498Spjd usage(B_FALSE); 509168498Spjd } 510168404Spjd if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) { 511168404Spjd return (10*i); 512168404Spjd } 513168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf); 514168498Spjd usage(B_FALSE); 515168498Spjd /* NOTREACHED */ 516168404Spjd} 517168404Spjd 518168404Spjdstatic uint64_t 519168404Spjdnicenumtoull(const char *buf) 520168404Spjd{ 521168404Spjd char *end; 522168404Spjd uint64_t val; 523168404Spjd 524168404Spjd val = strtoull(buf, &end, 0); 525168404Spjd if (end == buf) { 526168498Spjd (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf); 527168498Spjd usage(B_FALSE); 528168404Spjd } else if (end[0] == '.') { 529168404Spjd double fval = strtod(buf, &end); 530168404Spjd fval *= pow(2, str2shift(end)); 531168498Spjd if (fval > UINT64_MAX) { 532168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 533168498Spjd buf); 534168498Spjd usage(B_FALSE); 535168498Spjd } 536168404Spjd val = (uint64_t)fval; 537168404Spjd } else { 538168404Spjd int shift = str2shift(end); 539168498Spjd if (shift >= 64 || (val << shift) >> shift != val) { 540168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 541168498Spjd buf); 542168498Spjd usage(B_FALSE); 543168498Spjd } 544168404Spjd val <<= shift; 545168404Spjd } 546168404Spjd return (val); 547168404Spjd} 548168404Spjd 549168404Spjdstatic void 550168498Spjdusage(boolean_t requested) 551168404Spjd{ 552236143Smm const ztest_shared_opts_t *zo = &ztest_opts_defaults; 553236143Smm 554168404Spjd char nice_vdev_size[10]; 555168404Spjd char nice_gang_bang[10]; 556168498Spjd FILE *fp = requested ? stdout : stderr; 557168404Spjd 558236143Smm nicenum(zo->zo_vdev_size, nice_vdev_size); 559236143Smm nicenum(zo->zo_metaslab_gang_bang, nice_gang_bang); 560168404Spjd 561168498Spjd (void) fprintf(fp, "Usage: %s\n" 562168404Spjd "\t[-v vdevs (default: %llu)]\n" 563168404Spjd "\t[-s size_of_each_vdev (default: %s)]\n" 564219089Spjd "\t[-a alignment_shift (default: %d)] use 0 for random\n" 565168404Spjd "\t[-m mirror_copies (default: %d)]\n" 566168404Spjd "\t[-r raidz_disks (default: %d)]\n" 567168404Spjd "\t[-R raidz_parity (default: %d)]\n" 568168404Spjd "\t[-d datasets (default: %d)]\n" 569168404Spjd "\t[-t threads (default: %d)]\n" 570168404Spjd "\t[-g gang_block_threshold (default: %s)]\n" 571219089Spjd "\t[-i init_count (default: %d)] initialize pool i times\n" 572219089Spjd "\t[-k kill_percentage (default: %llu%%)]\n" 573168404Spjd "\t[-p pool_name (default: %s)]\n" 574219089Spjd "\t[-f dir (default: %s)] file directory for vdev files\n" 575219089Spjd "\t[-V] verbose (use multiple times for ever more blather)\n" 576219089Spjd "\t[-E] use existing pool instead of creating new one\n" 577219089Spjd "\t[-T time (default: %llu sec)] total run time\n" 578219089Spjd "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n" 579219089Spjd "\t[-P passtime (default: %llu sec)] time per pass\n" 580236143Smm "\t[-B alt_ztest (default: <none>)] alternate ztest path\n" 581168498Spjd "\t[-h] (print help)\n" 582168404Spjd "", 583236143Smm zo->zo_pool, 584236143Smm (u_longlong_t)zo->zo_vdevs, /* -v */ 585185029Spjd nice_vdev_size, /* -s */ 586236143Smm zo->zo_ashift, /* -a */ 587236143Smm zo->zo_mirrors, /* -m */ 588236143Smm zo->zo_raidz, /* -r */ 589236143Smm zo->zo_raidz_parity, /* -R */ 590236143Smm zo->zo_datasets, /* -d */ 591236143Smm zo->zo_threads, /* -t */ 592185029Spjd nice_gang_bang, /* -g */ 593236143Smm zo->zo_init, /* -i */ 594236143Smm (u_longlong_t)zo->zo_killrate, /* -k */ 595236143Smm zo->zo_pool, /* -p */ 596236143Smm zo->zo_dir, /* -f */ 597236143Smm (u_longlong_t)zo->zo_time, /* -T */ 598236143Smm (u_longlong_t)zo->zo_maxloops, /* -F */ 599236143Smm (u_longlong_t)zo->zo_passtime); 600168498Spjd exit(requested ? 0 : 1); 601168404Spjd} 602168404Spjd 603168404Spjdstatic void 604168404Spjdprocess_options(int argc, char **argv) 605168404Spjd{ 606236143Smm char *path; 607236143Smm ztest_shared_opts_t *zo = &ztest_opts; 608236143Smm 609168404Spjd int opt; 610168404Spjd uint64_t value; 611236143Smm char altdir[MAXNAMELEN] = { 0 }; 612168404Spjd 613236143Smm bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); 614168404Spjd 615168404Spjd while ((opt = getopt(argc, argv, 616236143Smm "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:")) != EOF) { 617168404Spjd value = 0; 618168404Spjd switch (opt) { 619185029Spjd case 'v': 620185029Spjd case 's': 621185029Spjd case 'a': 622185029Spjd case 'm': 623185029Spjd case 'r': 624185029Spjd case 'R': 625185029Spjd case 'd': 626185029Spjd case 't': 627185029Spjd case 'g': 628185029Spjd case 'i': 629185029Spjd case 'k': 630185029Spjd case 'T': 631185029Spjd case 'P': 632219089Spjd case 'F': 633168404Spjd value = nicenumtoull(optarg); 634168404Spjd } 635168404Spjd switch (opt) { 636185029Spjd case 'v': 637236143Smm zo->zo_vdevs = value; 638168404Spjd break; 639185029Spjd case 's': 640236143Smm zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value); 641168404Spjd break; 642185029Spjd case 'a': 643236143Smm zo->zo_ashift = value; 644168404Spjd break; 645185029Spjd case 'm': 646236143Smm zo->zo_mirrors = value; 647168404Spjd break; 648185029Spjd case 'r': 649236143Smm zo->zo_raidz = MAX(1, value); 650168404Spjd break; 651185029Spjd case 'R': 652236143Smm zo->zo_raidz_parity = MIN(MAX(value, 1), 3); 653168404Spjd break; 654185029Spjd case 'd': 655236143Smm zo->zo_datasets = MAX(1, value); 656168404Spjd break; 657185029Spjd case 't': 658236143Smm zo->zo_threads = MAX(1, value); 659168404Spjd break; 660185029Spjd case 'g': 661236143Smm zo->zo_metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, 662236143Smm value); 663168404Spjd break; 664185029Spjd case 'i': 665236143Smm zo->zo_init = value; 666168404Spjd break; 667185029Spjd case 'k': 668236143Smm zo->zo_killrate = value; 669168404Spjd break; 670185029Spjd case 'p': 671236143Smm (void) strlcpy(zo->zo_pool, optarg, 672236143Smm sizeof (zo->zo_pool)); 673168404Spjd break; 674185029Spjd case 'f': 675236143Smm path = realpath(optarg, NULL); 676236143Smm if (path == NULL) { 677236143Smm (void) fprintf(stderr, "error: %s: %s\n", 678236143Smm optarg, strerror(errno)); 679236143Smm usage(B_FALSE); 680236143Smm } else { 681236143Smm (void) strlcpy(zo->zo_dir, path, 682236143Smm sizeof (zo->zo_dir)); 683236143Smm } 684168404Spjd break; 685185029Spjd case 'V': 686236143Smm zo->zo_verbose++; 687168404Spjd break; 688185029Spjd case 'E': 689236143Smm zo->zo_init = 0; 690168404Spjd break; 691185029Spjd case 'T': 692236143Smm zo->zo_time = value; 693168404Spjd break; 694185029Spjd case 'P': 695236143Smm zo->zo_passtime = MAX(1, value); 696168404Spjd break; 697219089Spjd case 'F': 698236143Smm zo->zo_maxloops = MAX(1, value); 699219089Spjd break; 700236143Smm case 'B': 701236143Smm (void) strlcpy(altdir, optarg, sizeof (altdir)); 702236143Smm break; 703185029Spjd case 'h': 704168498Spjd usage(B_TRUE); 705168498Spjd break; 706185029Spjd case '?': 707185029Spjd default: 708168498Spjd usage(B_FALSE); 709168404Spjd break; 710168404Spjd } 711168404Spjd } 712168404Spjd 713236143Smm zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1); 714168404Spjd 715236143Smm zo->zo_vdevtime = 716236143Smm (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs : 717219089Spjd UINT64_MAX >> 2); 718236143Smm 719236143Smm if (strlen(altdir) > 0) { 720242845Sdelphij char *cmd; 721242845Sdelphij char *realaltdir; 722236143Smm char *bin; 723236143Smm char *ztest; 724236143Smm char *isa; 725236143Smm int isalen; 726236143Smm 727242845Sdelphij cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 728242845Sdelphij realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 729242845Sdelphij 730242845Sdelphij VERIFY(NULL != realpath(getexecname(), cmd)); 731236143Smm if (0 != access(altdir, F_OK)) { 732236143Smm ztest_dump_core = B_FALSE; 733236143Smm fatal(B_TRUE, "invalid alternate ztest path: %s", 734236143Smm altdir); 735236143Smm } 736236143Smm VERIFY(NULL != realpath(altdir, realaltdir)); 737236143Smm 738236143Smm /* 739236143Smm * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest". 740236143Smm * We want to extract <isa> to determine if we should use 741236143Smm * 32 or 64 bit binaries. 742236143Smm */ 743236143Smm bin = strstr(cmd, "/usr/bin/"); 744236143Smm ztest = strstr(bin, "/ztest"); 745236143Smm isa = bin + 9; 746236143Smm isalen = ztest - isa; 747236143Smm (void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest), 748236143Smm "%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa); 749236143Smm (void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath), 750236143Smm "%s/usr/lib/%.*s", realaltdir, isalen, isa); 751236143Smm 752236143Smm if (0 != access(zo->zo_alt_ztest, X_OK)) { 753236143Smm ztest_dump_core = B_FALSE; 754236143Smm fatal(B_TRUE, "invalid alternate ztest: %s", 755236143Smm zo->zo_alt_ztest); 756236143Smm } else if (0 != access(zo->zo_alt_libpath, X_OK)) { 757236143Smm ztest_dump_core = B_FALSE; 758236143Smm fatal(B_TRUE, "invalid alternate lib directory %s", 759236143Smm zo->zo_alt_libpath); 760236143Smm } 761242845Sdelphij 762242845Sdelphij umem_free(cmd, MAXPATHLEN); 763242845Sdelphij umem_free(realaltdir, MAXPATHLEN); 764236143Smm } 765168404Spjd} 766168404Spjd 767219089Spjdstatic void 768219089Spjdztest_kill(ztest_shared_t *zs) 769219089Spjd{ 770236143Smm zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa)); 771236143Smm zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa)); 772254112Sdelphij 773254112Sdelphij /* 774254112Sdelphij * Before we kill off ztest, make sure that the config is updated. 775254112Sdelphij * See comment above spa_config_sync(). 776254112Sdelphij */ 777254112Sdelphij mutex_enter(&spa_namespace_lock); 778254112Sdelphij spa_config_sync(ztest_spa, B_FALSE, B_FALSE); 779254112Sdelphij mutex_exit(&spa_namespace_lock); 780254112Sdelphij 781254112Sdelphij zfs_dbgmsg_print(FTAG); 782219089Spjd (void) kill(getpid(), SIGKILL); 783219089Spjd} 784219089Spjd 785168404Spjdstatic uint64_t 786219089Spjdztest_random(uint64_t range) 787219089Spjd{ 788219089Spjd uint64_t r; 789219089Spjd 790242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 791242845Sdelphij 792219089Spjd if (range == 0) 793219089Spjd return (0); 794219089Spjd 795242845Sdelphij if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r)) 796219089Spjd fatal(1, "short read from /dev/urandom"); 797219089Spjd 798219089Spjd return (r % range); 799219089Spjd} 800219089Spjd 801219089Spjd/* ARGSUSED */ 802219089Spjdstatic void 803219089Spjdztest_record_enospc(const char *s) 804219089Spjd{ 805219089Spjd ztest_shared->zs_enospc_count++; 806219089Spjd} 807219089Spjd 808219089Spjdstatic uint64_t 809168404Spjdztest_get_ashift(void) 810168404Spjd{ 811236143Smm if (ztest_opts.zo_ashift == 0) 812168404Spjd return (SPA_MINBLOCKSHIFT + ztest_random(3)); 813236143Smm return (ztest_opts.zo_ashift); 814168404Spjd} 815168404Spjd 816168404Spjdstatic nvlist_t * 817243505Smmmake_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift) 818168404Spjd{ 819185029Spjd char pathbuf[MAXPATHLEN]; 820168404Spjd uint64_t vdev; 821168404Spjd nvlist_t *file; 822168404Spjd 823185029Spjd if (ashift == 0) 824185029Spjd ashift = ztest_get_ashift(); 825168404Spjd 826185029Spjd if (path == NULL) { 827185029Spjd path = pathbuf; 828185029Spjd 829185029Spjd if (aux != NULL) { 830185029Spjd vdev = ztest_shared->zs_vdev_aux; 831236143Smm (void) snprintf(path, sizeof (pathbuf), 832236143Smm ztest_aux_template, ztest_opts.zo_dir, 833243505Smm pool == NULL ? ztest_opts.zo_pool : pool, 834243505Smm aux, vdev); 835185029Spjd } else { 836219089Spjd vdev = ztest_shared->zs_vdev_next_leaf++; 837236143Smm (void) snprintf(path, sizeof (pathbuf), 838236143Smm ztest_dev_template, ztest_opts.zo_dir, 839243505Smm pool == NULL ? ztest_opts.zo_pool : pool, vdev); 840185029Spjd } 841185029Spjd } 842185029Spjd 843185029Spjd if (size != 0) { 844185029Spjd int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666); 845168404Spjd if (fd == -1) 846185029Spjd fatal(1, "can't open %s", path); 847168404Spjd if (ftruncate(fd, size) != 0) 848185029Spjd fatal(1, "can't ftruncate %s", path); 849168404Spjd (void) close(fd); 850168404Spjd } 851168404Spjd 852168404Spjd VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); 853168404Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); 854185029Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0); 855168404Spjd VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0); 856168404Spjd 857168404Spjd return (file); 858168404Spjd} 859168404Spjd 860168404Spjdstatic nvlist_t * 861243505Smmmake_vdev_raidz(char *path, char *aux, char *pool, size_t size, 862243505Smm uint64_t ashift, int r) 863168404Spjd{ 864168404Spjd nvlist_t *raidz, **child; 865168404Spjd int c; 866168404Spjd 867168404Spjd if (r < 2) 868243505Smm return (make_vdev_file(path, aux, pool, size, ashift)); 869168404Spjd child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL); 870168404Spjd 871168404Spjd for (c = 0; c < r; c++) 872243505Smm child[c] = make_vdev_file(path, aux, pool, size, ashift); 873168404Spjd 874168404Spjd VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); 875168404Spjd VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, 876168404Spjd VDEV_TYPE_RAIDZ) == 0); 877168404Spjd VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY, 878236143Smm ztest_opts.zo_raidz_parity) == 0); 879168404Spjd VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, 880168404Spjd child, r) == 0); 881168404Spjd 882168404Spjd for (c = 0; c < r; c++) 883168404Spjd nvlist_free(child[c]); 884168404Spjd 885168404Spjd umem_free(child, r * sizeof (nvlist_t *)); 886168404Spjd 887168404Spjd return (raidz); 888168404Spjd} 889168404Spjd 890168404Spjdstatic nvlist_t * 891243505Smmmake_vdev_mirror(char *path, char *aux, char *pool, size_t size, 892243505Smm uint64_t ashift, int r, int m) 893168404Spjd{ 894168404Spjd nvlist_t *mirror, **child; 895168404Spjd int c; 896168404Spjd 897168404Spjd if (m < 1) 898243505Smm return (make_vdev_raidz(path, aux, pool, size, ashift, r)); 899168404Spjd 900168404Spjd child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL); 901168404Spjd 902168404Spjd for (c = 0; c < m; c++) 903243505Smm child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r); 904168404Spjd 905168404Spjd VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0); 906168404Spjd VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, 907168404Spjd VDEV_TYPE_MIRROR) == 0); 908168404Spjd VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, 909168404Spjd child, m) == 0); 910168404Spjd 911168404Spjd for (c = 0; c < m; c++) 912168404Spjd nvlist_free(child[c]); 913168404Spjd 914168404Spjd umem_free(child, m * sizeof (nvlist_t *)); 915168404Spjd 916168404Spjd return (mirror); 917168404Spjd} 918168404Spjd 919168404Spjdstatic nvlist_t * 920243505Smmmake_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift, 921243505Smm int log, int r, int m, int t) 922168404Spjd{ 923168404Spjd nvlist_t *root, **child; 924168404Spjd int c; 925168404Spjd 926168404Spjd ASSERT(t > 0); 927168404Spjd 928168404Spjd child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); 929168404Spjd 930185029Spjd for (c = 0; c < t; c++) { 931243505Smm child[c] = make_vdev_mirror(path, aux, pool, size, ashift, 932243505Smm r, m); 933185029Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, 934185029Spjd log) == 0); 935185029Spjd } 936168404Spjd 937168404Spjd VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); 938168404Spjd VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); 939185029Spjd VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN, 940168404Spjd child, t) == 0); 941168404Spjd 942168404Spjd for (c = 0; c < t; c++) 943168404Spjd nvlist_free(child[c]); 944168404Spjd 945168404Spjd umem_free(child, t * sizeof (nvlist_t *)); 946168404Spjd 947168404Spjd return (root); 948168404Spjd} 949168404Spjd 950243505Smm/* 951243505Smm * Find a random spa version. Returns back a random spa version in the 952243505Smm * range [initial_version, SPA_VERSION_FEATURES]. 953243505Smm */ 954243505Smmstatic uint64_t 955243505Smmztest_random_spa_version(uint64_t initial_version) 956243505Smm{ 957243505Smm uint64_t version = initial_version; 958243505Smm 959243505Smm if (version <= SPA_VERSION_BEFORE_FEATURES) { 960243505Smm version = version + 961243505Smm ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1); 962243505Smm } 963243505Smm 964243505Smm if (version > SPA_VERSION_BEFORE_FEATURES) 965243505Smm version = SPA_VERSION_FEATURES; 966243505Smm 967243505Smm ASSERT(SPA_VERSION_IS_SUPPORTED(version)); 968243505Smm return (version); 969243505Smm} 970243505Smm 971219089Spjdstatic int 972219089Spjdztest_random_blocksize(void) 973219089Spjd{ 974219089Spjd return (1 << (SPA_MINBLOCKSHIFT + 975219089Spjd ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1))); 976219089Spjd} 977219089Spjd 978219089Spjdstatic int 979219089Spjdztest_random_ibshift(void) 980219089Spjd{ 981219089Spjd return (DN_MIN_INDBLKSHIFT + 982219089Spjd ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1)); 983219089Spjd} 984219089Spjd 985219089Spjdstatic uint64_t 986219089Spjdztest_random_vdev_top(spa_t *spa, boolean_t log_ok) 987219089Spjd{ 988219089Spjd uint64_t top; 989219089Spjd vdev_t *rvd = spa->spa_root_vdev; 990219089Spjd vdev_t *tvd; 991219089Spjd 992219089Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); 993219089Spjd 994219089Spjd do { 995219089Spjd top = ztest_random(rvd->vdev_children); 996219089Spjd tvd = rvd->vdev_child[top]; 997219089Spjd } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) || 998219089Spjd tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL); 999219089Spjd 1000219089Spjd return (top); 1001219089Spjd} 1002219089Spjd 1003219089Spjdstatic uint64_t 1004219089Spjdztest_random_dsl_prop(zfs_prop_t prop) 1005219089Spjd{ 1006219089Spjd uint64_t value; 1007219089Spjd 1008219089Spjd do { 1009219089Spjd value = zfs_prop_random_value(prop, ztest_random(-1ULL)); 1010219089Spjd } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF); 1011219089Spjd 1012219089Spjd return (value); 1013219089Spjd} 1014219089Spjd 1015219089Spjdstatic int 1016219089Spjdztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, 1017219089Spjd boolean_t inherit) 1018219089Spjd{ 1019219089Spjd const char *propname = zfs_prop_to_name(prop); 1020219089Spjd const char *valname; 1021219089Spjd char setpoint[MAXPATHLEN]; 1022219089Spjd uint64_t curval; 1023219089Spjd int error; 1024219089Spjd 1025248571Smm error = dsl_prop_set_int(osname, propname, 1026248571Smm (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value); 1027219089Spjd 1028219089Spjd if (error == ENOSPC) { 1029219089Spjd ztest_record_enospc(FTAG); 1030219089Spjd return (error); 1031219089Spjd } 1032240415Smm ASSERT0(error); 1033219089Spjd 1034248571Smm VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint)); 1035219089Spjd 1036236143Smm if (ztest_opts.zo_verbose >= 6) { 1037219089Spjd VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0); 1038219089Spjd (void) printf("%s %s = %s at '%s'\n", 1039219089Spjd osname, propname, valname, setpoint); 1040219089Spjd } 1041219089Spjd 1042219089Spjd return (error); 1043219089Spjd} 1044219089Spjd 1045219089Spjdstatic int 1046236143Smmztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) 1047219089Spjd{ 1048236143Smm spa_t *spa = ztest_spa; 1049219089Spjd nvlist_t *props = NULL; 1050219089Spjd int error; 1051219089Spjd 1052219089Spjd VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 1053219089Spjd VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0); 1054219089Spjd 1055219089Spjd error = spa_prop_set(spa, props); 1056219089Spjd 1057219089Spjd nvlist_free(props); 1058219089Spjd 1059219089Spjd if (error == ENOSPC) { 1060219089Spjd ztest_record_enospc(FTAG); 1061219089Spjd return (error); 1062219089Spjd } 1063240415Smm ASSERT0(error); 1064219089Spjd 1065219089Spjd return (error); 1066219089Spjd} 1067219089Spjd 1068168404Spjdstatic void 1069219089Spjdztest_rll_init(rll_t *rll) 1070168404Spjd{ 1071219089Spjd rll->rll_writer = NULL; 1072219089Spjd rll->rll_readers = 0; 1073219089Spjd VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0); 1074219089Spjd VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0); 1075219089Spjd} 1076219089Spjd 1077219089Spjdstatic void 1078219089Spjdztest_rll_destroy(rll_t *rll) 1079219089Spjd{ 1080219089Spjd ASSERT(rll->rll_writer == NULL); 1081219089Spjd ASSERT(rll->rll_readers == 0); 1082219089Spjd VERIFY(_mutex_destroy(&rll->rll_lock) == 0); 1083219089Spjd VERIFY(cond_destroy(&rll->rll_cv) == 0); 1084219089Spjd} 1085219089Spjd 1086219089Spjdstatic void 1087219089Spjdztest_rll_lock(rll_t *rll, rl_type_t type) 1088219089Spjd{ 1089219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1090219089Spjd 1091219089Spjd if (type == RL_READER) { 1092219089Spjd while (rll->rll_writer != NULL) 1093219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1094219089Spjd rll->rll_readers++; 1095219089Spjd } else { 1096219089Spjd while (rll->rll_writer != NULL || rll->rll_readers) 1097219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1098219089Spjd rll->rll_writer = curthread; 1099219089Spjd } 1100219089Spjd 1101219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1102219089Spjd} 1103219089Spjd 1104219089Spjdstatic void 1105219089Spjdztest_rll_unlock(rll_t *rll) 1106219089Spjd{ 1107219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1108219089Spjd 1109219089Spjd if (rll->rll_writer) { 1110219089Spjd ASSERT(rll->rll_readers == 0); 1111219089Spjd rll->rll_writer = NULL; 1112219089Spjd } else { 1113219089Spjd ASSERT(rll->rll_readers != 0); 1114219089Spjd ASSERT(rll->rll_writer == NULL); 1115219089Spjd rll->rll_readers--; 1116219089Spjd } 1117219089Spjd 1118219089Spjd if (rll->rll_writer == NULL && rll->rll_readers == 0) 1119219089Spjd VERIFY(cond_broadcast(&rll->rll_cv) == 0); 1120219089Spjd 1121219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1122219089Spjd} 1123219089Spjd 1124219089Spjdstatic void 1125219089Spjdztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type) 1126219089Spjd{ 1127219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1128219089Spjd 1129219089Spjd ztest_rll_lock(rll, type); 1130219089Spjd} 1131219089Spjd 1132219089Spjdstatic void 1133219089Spjdztest_object_unlock(ztest_ds_t *zd, uint64_t object) 1134219089Spjd{ 1135219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1136219089Spjd 1137219089Spjd ztest_rll_unlock(rll); 1138219089Spjd} 1139219089Spjd 1140219089Spjdstatic rl_t * 1141219089Spjdztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, 1142219089Spjd uint64_t size, rl_type_t type) 1143219089Spjd{ 1144219089Spjd uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); 1145219089Spjd rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; 1146219089Spjd rl_t *rl; 1147219089Spjd 1148219089Spjd rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); 1149219089Spjd rl->rl_object = object; 1150219089Spjd rl->rl_offset = offset; 1151219089Spjd rl->rl_size = size; 1152219089Spjd rl->rl_lock = rll; 1153219089Spjd 1154219089Spjd ztest_rll_lock(rll, type); 1155219089Spjd 1156219089Spjd return (rl); 1157219089Spjd} 1158219089Spjd 1159219089Spjdstatic void 1160219089Spjdztest_range_unlock(rl_t *rl) 1161219089Spjd{ 1162219089Spjd rll_t *rll = rl->rl_lock; 1163219089Spjd 1164219089Spjd ztest_rll_unlock(rll); 1165219089Spjd 1166219089Spjd umem_free(rl, sizeof (*rl)); 1167219089Spjd} 1168219089Spjd 1169219089Spjdstatic void 1170236143Smmztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) 1171219089Spjd{ 1172219089Spjd zd->zd_os = os; 1173219089Spjd zd->zd_zilog = dmu_objset_zil(os); 1174236143Smm zd->zd_shared = szd; 1175219089Spjd dmu_objset_name(os, zd->zd_name); 1176219089Spjd 1177236143Smm if (zd->zd_shared != NULL) 1178236143Smm zd->zd_shared->zd_seq = 0; 1179236143Smm 1180224526Smm VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0); 1181219089Spjd VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0); 1182219089Spjd 1183219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1184219089Spjd ztest_rll_init(&zd->zd_object_lock[l]); 1185219089Spjd 1186219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1187219089Spjd ztest_rll_init(&zd->zd_range_lock[l]); 1188219089Spjd} 1189219089Spjd 1190219089Spjdstatic void 1191219089Spjdztest_zd_fini(ztest_ds_t *zd) 1192219089Spjd{ 1193219089Spjd VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0); 1194219089Spjd 1195219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1196219089Spjd ztest_rll_destroy(&zd->zd_object_lock[l]); 1197219089Spjd 1198219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1199219089Spjd ztest_rll_destroy(&zd->zd_range_lock[l]); 1200219089Spjd} 1201219089Spjd 1202219089Spjd#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT) 1203219089Spjd 1204219089Spjdstatic uint64_t 1205219089Spjdztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag) 1206219089Spjd{ 1207219089Spjd uint64_t txg; 1208168404Spjd int error; 1209168404Spjd 1210219089Spjd /* 1211219089Spjd * Attempt to assign tx to some transaction group. 1212219089Spjd */ 1213219089Spjd error = dmu_tx_assign(tx, txg_how); 1214168404Spjd if (error) { 1215219089Spjd if (error == ERESTART) { 1216219089Spjd ASSERT(txg_how == TXG_NOWAIT); 1217219089Spjd dmu_tx_wait(tx); 1218219089Spjd } else { 1219219089Spjd ASSERT3U(error, ==, ENOSPC); 1220219089Spjd ztest_record_enospc(tag); 1221219089Spjd } 1222219089Spjd dmu_tx_abort(tx); 1223219089Spjd return (0); 1224168404Spjd } 1225219089Spjd txg = dmu_tx_get_txg(tx); 1226219089Spjd ASSERT(txg != 0); 1227219089Spjd return (txg); 1228168404Spjd} 1229168404Spjd 1230219089Spjdstatic void 1231219089Spjdztest_pattern_set(void *buf, uint64_t size, uint64_t value) 1232168404Spjd{ 1233219089Spjd uint64_t *ip = buf; 1234219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1235168404Spjd 1236219089Spjd while (ip < ip_end) 1237219089Spjd *ip++ = value; 1238219089Spjd} 1239168404Spjd 1240219089Spjdstatic boolean_t 1241219089Spjdztest_pattern_match(void *buf, uint64_t size, uint64_t value) 1242219089Spjd{ 1243219089Spjd uint64_t *ip = buf; 1244219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1245219089Spjd uint64_t diff = 0; 1246168404Spjd 1247219089Spjd while (ip < ip_end) 1248219089Spjd diff |= (value - *ip++); 1249219089Spjd 1250219089Spjd return (diff == 0); 1251168404Spjd} 1252168404Spjd 1253219089Spjdstatic void 1254219089Spjdztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1255219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1256168404Spjd{ 1257219089Spjd bt->bt_magic = BT_MAGIC; 1258219089Spjd bt->bt_objset = dmu_objset_id(os); 1259219089Spjd bt->bt_object = object; 1260219089Spjd bt->bt_offset = offset; 1261219089Spjd bt->bt_gen = gen; 1262219089Spjd bt->bt_txg = txg; 1263219089Spjd bt->bt_crtxg = crtxg; 1264168404Spjd} 1265168404Spjd 1266219089Spjdstatic void 1267219089Spjdztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1268219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1269219089Spjd{ 1270219089Spjd ASSERT(bt->bt_magic == BT_MAGIC); 1271219089Spjd ASSERT(bt->bt_objset == dmu_objset_id(os)); 1272219089Spjd ASSERT(bt->bt_object == object); 1273219089Spjd ASSERT(bt->bt_offset == offset); 1274219089Spjd ASSERT(bt->bt_gen <= gen); 1275219089Spjd ASSERT(bt->bt_txg <= txg); 1276219089Spjd ASSERT(bt->bt_crtxg == crtxg); 1277219089Spjd} 1278219089Spjd 1279219089Spjdstatic ztest_block_tag_t * 1280219089Spjdztest_bt_bonus(dmu_buf_t *db) 1281219089Spjd{ 1282219089Spjd dmu_object_info_t doi; 1283219089Spjd ztest_block_tag_t *bt; 1284219089Spjd 1285219089Spjd dmu_object_info_from_db(db, &doi); 1286219089Spjd ASSERT3U(doi.doi_bonus_size, <=, db->db_size); 1287219089Spjd ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt)); 1288219089Spjd bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt)); 1289219089Spjd 1290219089Spjd return (bt); 1291219089Spjd} 1292219089Spjd 1293219089Spjd/* 1294219089Spjd * ZIL logging ops 1295219089Spjd */ 1296219089Spjd 1297219089Spjd#define lrz_type lr_mode 1298219089Spjd#define lrz_blocksize lr_uid 1299219089Spjd#define lrz_ibshift lr_gid 1300219089Spjd#define lrz_bonustype lr_rdev 1301219089Spjd#define lrz_bonuslen lr_crtime[1] 1302219089Spjd 1303219089Spjdstatic void 1304219089Spjdztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) 1305219089Spjd{ 1306219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1307219089Spjd size_t namesize = strlen(name) + 1; 1308219089Spjd itx_t *itx; 1309219089Spjd 1310219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1311219089Spjd return; 1312219089Spjd 1313219089Spjd itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize); 1314219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1315219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1316219089Spjd 1317219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1318219089Spjd} 1319219089Spjd 1320219089Spjdstatic void 1321219089Spjdztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object) 1322219089Spjd{ 1323219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1324219089Spjd size_t namesize = strlen(name) + 1; 1325219089Spjd itx_t *itx; 1326219089Spjd 1327219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1328219089Spjd return; 1329219089Spjd 1330219089Spjd itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize); 1331219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1332219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1333219089Spjd 1334219089Spjd itx->itx_oid = object; 1335219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1336219089Spjd} 1337219089Spjd 1338219089Spjdstatic void 1339219089Spjdztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) 1340219089Spjd{ 1341219089Spjd itx_t *itx; 1342219089Spjd itx_wr_state_t write_state = ztest_random(WR_NUM_STATES); 1343219089Spjd 1344219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1345219089Spjd return; 1346219089Spjd 1347219089Spjd if (lr->lr_length > ZIL_MAX_LOG_DATA) 1348219089Spjd write_state = WR_INDIRECT; 1349219089Spjd 1350219089Spjd itx = zil_itx_create(TX_WRITE, 1351219089Spjd sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0)); 1352219089Spjd 1353219089Spjd if (write_state == WR_COPIED && 1354219089Spjd dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length, 1355219089Spjd ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) { 1356219089Spjd zil_itx_destroy(itx); 1357219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1358219089Spjd write_state = WR_NEED_COPY; 1359219089Spjd } 1360219089Spjd itx->itx_private = zd; 1361219089Spjd itx->itx_wr_state = write_state; 1362219089Spjd itx->itx_sync = (ztest_random(8) == 0); 1363219089Spjd itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0); 1364219089Spjd 1365219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1366219089Spjd sizeof (*lr) - sizeof (lr_t)); 1367219089Spjd 1368219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1369219089Spjd} 1370219089Spjd 1371219089Spjdstatic void 1372219089Spjdztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr) 1373219089Spjd{ 1374219089Spjd itx_t *itx; 1375219089Spjd 1376219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1377219089Spjd return; 1378219089Spjd 1379219089Spjd itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 1380219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1381219089Spjd sizeof (*lr) - sizeof (lr_t)); 1382219089Spjd 1383219089Spjd itx->itx_sync = B_FALSE; 1384219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1385219089Spjd} 1386219089Spjd 1387219089Spjdstatic void 1388219089Spjdztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr) 1389219089Spjd{ 1390219089Spjd itx_t *itx; 1391219089Spjd 1392219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1393219089Spjd return; 1394219089Spjd 1395219089Spjd itx = zil_itx_create(TX_SETATTR, sizeof (*lr)); 1396219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1397219089Spjd sizeof (*lr) - sizeof (lr_t)); 1398219089Spjd 1399219089Spjd itx->itx_sync = B_FALSE; 1400219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1401219089Spjd} 1402219089Spjd 1403219089Spjd/* 1404219089Spjd * ZIL replay ops 1405219089Spjd */ 1406168404Spjdstatic int 1407219089Spjdztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap) 1408168404Spjd{ 1409219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1410219089Spjd objset_t *os = zd->zd_os; 1411219089Spjd ztest_block_tag_t *bbt; 1412219089Spjd dmu_buf_t *db; 1413168404Spjd dmu_tx_t *tx; 1414219089Spjd uint64_t txg; 1415219089Spjd int error = 0; 1416168404Spjd 1417168404Spjd if (byteswap) 1418168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1419168404Spjd 1420219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1421219089Spjd ASSERT(name[0] != '\0'); 1422219089Spjd 1423168404Spjd tx = dmu_tx_create(os); 1424219089Spjd 1425219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name); 1426219089Spjd 1427219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1428219089Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1429219089Spjd } else { 1430219089Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1431219089Spjd } 1432219089Spjd 1433219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1434219089Spjd if (txg == 0) 1435219089Spjd return (ENOSPC); 1436219089Spjd 1437219089Spjd ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid); 1438219089Spjd 1439219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1440219089Spjd if (lr->lr_foid == 0) { 1441219089Spjd lr->lr_foid = zap_create(os, 1442219089Spjd lr->lrz_type, lr->lrz_bonustype, 1443219089Spjd lr->lrz_bonuslen, tx); 1444219089Spjd } else { 1445219089Spjd error = zap_create_claim(os, lr->lr_foid, 1446219089Spjd lr->lrz_type, lr->lrz_bonustype, 1447219089Spjd lr->lrz_bonuslen, tx); 1448219089Spjd } 1449219089Spjd } else { 1450219089Spjd if (lr->lr_foid == 0) { 1451219089Spjd lr->lr_foid = dmu_object_alloc(os, 1452219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1453219089Spjd lr->lrz_bonuslen, tx); 1454219089Spjd } else { 1455219089Spjd error = dmu_object_claim(os, lr->lr_foid, 1456219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1457219089Spjd lr->lrz_bonuslen, tx); 1458219089Spjd } 1459219089Spjd } 1460219089Spjd 1461168404Spjd if (error) { 1462219089Spjd ASSERT3U(error, ==, EEXIST); 1463219089Spjd ASSERT(zd->zd_zilog->zl_replay); 1464219089Spjd dmu_tx_commit(tx); 1465168404Spjd return (error); 1466168404Spjd } 1467168404Spjd 1468219089Spjd ASSERT(lr->lr_foid != 0); 1469219089Spjd 1470219089Spjd if (lr->lrz_type != DMU_OT_ZAP_OTHER) 1471219089Spjd VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid, 1472219089Spjd lr->lrz_blocksize, lr->lrz_ibshift, tx)); 1473219089Spjd 1474219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1475219089Spjd bbt = ztest_bt_bonus(db); 1476219089Spjd dmu_buf_will_dirty(db, tx); 1477219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg); 1478219089Spjd dmu_buf_rele(db, FTAG); 1479219089Spjd 1480219089Spjd VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1, 1481219089Spjd &lr->lr_foid, tx)); 1482219089Spjd 1483219089Spjd (void) ztest_log_create(zd, tx, lr); 1484219089Spjd 1485168404Spjd dmu_tx_commit(tx); 1486168404Spjd 1487219089Spjd return (0); 1488219089Spjd} 1489219089Spjd 1490219089Spjdstatic int 1491219089Spjdztest_replay_remove(ztest_ds_t *zd, lr_remove_t *lr, boolean_t byteswap) 1492219089Spjd{ 1493219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1494219089Spjd objset_t *os = zd->zd_os; 1495219089Spjd dmu_object_info_t doi; 1496219089Spjd dmu_tx_t *tx; 1497219089Spjd uint64_t object, txg; 1498219089Spjd 1499219089Spjd if (byteswap) 1500219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1501219089Spjd 1502219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1503219089Spjd ASSERT(name[0] != '\0'); 1504219089Spjd 1505219089Spjd VERIFY3U(0, ==, 1506219089Spjd zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object)); 1507219089Spjd ASSERT(object != 0); 1508219089Spjd 1509219089Spjd ztest_object_lock(zd, object, RL_WRITER); 1510219089Spjd 1511219089Spjd VERIFY3U(0, ==, dmu_object_info(os, object, &doi)); 1512219089Spjd 1513219089Spjd tx = dmu_tx_create(os); 1514219089Spjd 1515219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name); 1516219089Spjd dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); 1517219089Spjd 1518219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1519219089Spjd if (txg == 0) { 1520219089Spjd ztest_object_unlock(zd, object); 1521219089Spjd return (ENOSPC); 1522168404Spjd } 1523168404Spjd 1524219089Spjd if (doi.doi_type == DMU_OT_ZAP_OTHER) { 1525219089Spjd VERIFY3U(0, ==, zap_destroy(os, object, tx)); 1526219089Spjd } else { 1527219089Spjd VERIFY3U(0, ==, dmu_object_free(os, object, tx)); 1528219089Spjd } 1529219089Spjd 1530219089Spjd VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx)); 1531219089Spjd 1532219089Spjd (void) ztest_log_remove(zd, tx, lr, object); 1533219089Spjd 1534219089Spjd dmu_tx_commit(tx); 1535219089Spjd 1536219089Spjd ztest_object_unlock(zd, object); 1537219089Spjd 1538219089Spjd return (0); 1539168404Spjd} 1540168404Spjd 1541168404Spjdstatic int 1542219089Spjdztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) 1543168404Spjd{ 1544219089Spjd objset_t *os = zd->zd_os; 1545219089Spjd void *data = lr + 1; /* data follows lr */ 1546219089Spjd uint64_t offset, length; 1547219089Spjd ztest_block_tag_t *bt = data; 1548219089Spjd ztest_block_tag_t *bbt; 1549219089Spjd uint64_t gen, txg, lrtxg, crtxg; 1550219089Spjd dmu_object_info_t doi; 1551168404Spjd dmu_tx_t *tx; 1552219089Spjd dmu_buf_t *db; 1553219089Spjd arc_buf_t *abuf = NULL; 1554219089Spjd rl_t *rl; 1555168404Spjd 1556168404Spjd if (byteswap) 1557168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1558168404Spjd 1559219089Spjd offset = lr->lr_offset; 1560219089Spjd length = lr->lr_length; 1561219089Spjd 1562219089Spjd /* If it's a dmu_sync() block, write the whole block */ 1563219089Spjd if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 1564219089Spjd uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 1565219089Spjd if (length < blocksize) { 1566219089Spjd offset -= offset % blocksize; 1567219089Spjd length = blocksize; 1568219089Spjd } 1569219089Spjd } 1570219089Spjd 1571219089Spjd if (bt->bt_magic == BSWAP_64(BT_MAGIC)) 1572219089Spjd byteswap_uint64_array(bt, sizeof (*bt)); 1573219089Spjd 1574219089Spjd if (bt->bt_magic != BT_MAGIC) 1575219089Spjd bt = NULL; 1576219089Spjd 1577219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1578219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); 1579219089Spjd 1580219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1581219089Spjd 1582219089Spjd dmu_object_info_from_db(db, &doi); 1583219089Spjd 1584219089Spjd bbt = ztest_bt_bonus(db); 1585219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1586219089Spjd gen = bbt->bt_gen; 1587219089Spjd crtxg = bbt->bt_crtxg; 1588219089Spjd lrtxg = lr->lr_common.lrc_txg; 1589219089Spjd 1590168404Spjd tx = dmu_tx_create(os); 1591219089Spjd 1592219089Spjd dmu_tx_hold_write(tx, lr->lr_foid, offset, length); 1593219089Spjd 1594219089Spjd if (ztest_random(8) == 0 && length == doi.doi_data_block_size && 1595219089Spjd P2PHASE(offset, length) == 0) 1596219089Spjd abuf = dmu_request_arcbuf(db, length); 1597219089Spjd 1598219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1599219089Spjd if (txg == 0) { 1600219089Spjd if (abuf != NULL) 1601219089Spjd dmu_return_arcbuf(abuf); 1602219089Spjd dmu_buf_rele(db, FTAG); 1603219089Spjd ztest_range_unlock(rl); 1604219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1605219089Spjd return (ENOSPC); 1606168404Spjd } 1607168404Spjd 1608219089Spjd if (bt != NULL) { 1609219089Spjd /* 1610219089Spjd * Usually, verify the old data before writing new data -- 1611219089Spjd * but not always, because we also want to verify correct 1612219089Spjd * behavior when the data was not recently read into cache. 1613219089Spjd */ 1614219089Spjd ASSERT(offset % doi.doi_data_block_size == 0); 1615219089Spjd if (ztest_random(4) != 0) { 1616219089Spjd int prefetch = ztest_random(2) ? 1617219089Spjd DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH; 1618219089Spjd ztest_block_tag_t rbt; 1619219089Spjd 1620219089Spjd VERIFY(dmu_read(os, lr->lr_foid, offset, 1621219089Spjd sizeof (rbt), &rbt, prefetch) == 0); 1622219089Spjd if (rbt.bt_magic == BT_MAGIC) { 1623219089Spjd ztest_bt_verify(&rbt, os, lr->lr_foid, 1624219089Spjd offset, gen, txg, crtxg); 1625219089Spjd } 1626219089Spjd } 1627219089Spjd 1628219089Spjd /* 1629219089Spjd * Writes can appear to be newer than the bonus buffer because 1630219089Spjd * the ztest_get_data() callback does a dmu_read() of the 1631219089Spjd * open-context data, which may be different than the data 1632219089Spjd * as it was when the write was generated. 1633219089Spjd */ 1634219089Spjd if (zd->zd_zilog->zl_replay) { 1635219089Spjd ztest_bt_verify(bt, os, lr->lr_foid, offset, 1636219089Spjd MAX(gen, bt->bt_gen), MAX(txg, lrtxg), 1637219089Spjd bt->bt_crtxg); 1638219089Spjd } 1639219089Spjd 1640219089Spjd /* 1641219089Spjd * Set the bt's gen/txg to the bonus buffer's gen/txg 1642219089Spjd * so that all of the usual ASSERTs will work. 1643219089Spjd */ 1644219089Spjd ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg); 1645219089Spjd } 1646219089Spjd 1647219089Spjd if (abuf == NULL) { 1648219089Spjd dmu_write(os, lr->lr_foid, offset, length, data, tx); 1649219089Spjd } else { 1650219089Spjd bcopy(data, abuf->b_data, length); 1651219089Spjd dmu_assign_arcbuf(db, offset, abuf, tx); 1652219089Spjd } 1653219089Spjd 1654219089Spjd (void) ztest_log_write(zd, tx, lr); 1655219089Spjd 1656219089Spjd dmu_buf_rele(db, FTAG); 1657219089Spjd 1658168404Spjd dmu_tx_commit(tx); 1659168404Spjd 1660219089Spjd ztest_range_unlock(rl); 1661219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1662219089Spjd 1663219089Spjd return (0); 1664168404Spjd} 1665168404Spjd 1666219089Spjdstatic int 1667219089Spjdztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) 1668219089Spjd{ 1669219089Spjd objset_t *os = zd->zd_os; 1670219089Spjd dmu_tx_t *tx; 1671219089Spjd uint64_t txg; 1672219089Spjd rl_t *rl; 1673219089Spjd 1674219089Spjd if (byteswap) 1675219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1676219089Spjd 1677219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1678219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, 1679219089Spjd RL_WRITER); 1680219089Spjd 1681219089Spjd tx = dmu_tx_create(os); 1682219089Spjd 1683219089Spjd dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length); 1684219089Spjd 1685219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1686219089Spjd if (txg == 0) { 1687219089Spjd ztest_range_unlock(rl); 1688219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1689219089Spjd return (ENOSPC); 1690219089Spjd } 1691219089Spjd 1692219089Spjd VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset, 1693219089Spjd lr->lr_length, tx) == 0); 1694219089Spjd 1695219089Spjd (void) ztest_log_truncate(zd, tx, lr); 1696219089Spjd 1697219089Spjd dmu_tx_commit(tx); 1698219089Spjd 1699219089Spjd ztest_range_unlock(rl); 1700219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1701219089Spjd 1702219089Spjd return (0); 1703219089Spjd} 1704219089Spjd 1705219089Spjdstatic int 1706219089Spjdztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap) 1707219089Spjd{ 1708219089Spjd objset_t *os = zd->zd_os; 1709219089Spjd dmu_tx_t *tx; 1710219089Spjd dmu_buf_t *db; 1711219089Spjd ztest_block_tag_t *bbt; 1712219089Spjd uint64_t txg, lrtxg, crtxg; 1713219089Spjd 1714219089Spjd if (byteswap) 1715219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1716219089Spjd 1717219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_WRITER); 1718219089Spjd 1719219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1720219089Spjd 1721219089Spjd tx = dmu_tx_create(os); 1722219089Spjd dmu_tx_hold_bonus(tx, lr->lr_foid); 1723219089Spjd 1724219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1725219089Spjd if (txg == 0) { 1726219089Spjd dmu_buf_rele(db, FTAG); 1727219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1728219089Spjd return (ENOSPC); 1729219089Spjd } 1730219089Spjd 1731219089Spjd bbt = ztest_bt_bonus(db); 1732219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1733219089Spjd crtxg = bbt->bt_crtxg; 1734219089Spjd lrtxg = lr->lr_common.lrc_txg; 1735219089Spjd 1736219089Spjd if (zd->zd_zilog->zl_replay) { 1737219089Spjd ASSERT(lr->lr_size != 0); 1738219089Spjd ASSERT(lr->lr_mode != 0); 1739219089Spjd ASSERT(lrtxg != 0); 1740219089Spjd } else { 1741219089Spjd /* 1742219089Spjd * Randomly change the size and increment the generation. 1743219089Spjd */ 1744219089Spjd lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) * 1745219089Spjd sizeof (*bbt); 1746219089Spjd lr->lr_mode = bbt->bt_gen + 1; 1747219089Spjd ASSERT(lrtxg == 0); 1748219089Spjd } 1749219089Spjd 1750219089Spjd /* 1751219089Spjd * Verify that the current bonus buffer is not newer than our txg. 1752219089Spjd */ 1753219089Spjd ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, 1754219089Spjd MAX(txg, lrtxg), crtxg); 1755219089Spjd 1756219089Spjd dmu_buf_will_dirty(db, tx); 1757219089Spjd 1758219089Spjd ASSERT3U(lr->lr_size, >=, sizeof (*bbt)); 1759219089Spjd ASSERT3U(lr->lr_size, <=, db->db_size); 1760240415Smm VERIFY0(dmu_set_bonus(db, lr->lr_size, tx)); 1761219089Spjd bbt = ztest_bt_bonus(db); 1762219089Spjd 1763219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg); 1764219089Spjd 1765219089Spjd dmu_buf_rele(db, FTAG); 1766219089Spjd 1767219089Spjd (void) ztest_log_setattr(zd, tx, lr); 1768219089Spjd 1769219089Spjd dmu_tx_commit(tx); 1770219089Spjd 1771219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1772219089Spjd 1773219089Spjd return (0); 1774219089Spjd} 1775219089Spjd 1776168404Spjdzil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { 1777168404Spjd NULL, /* 0 no such transaction type */ 1778168404Spjd ztest_replay_create, /* TX_CREATE */ 1779168404Spjd NULL, /* TX_MKDIR */ 1780168404Spjd NULL, /* TX_MKXATTR */ 1781168404Spjd NULL, /* TX_SYMLINK */ 1782168404Spjd ztest_replay_remove, /* TX_REMOVE */ 1783168404Spjd NULL, /* TX_RMDIR */ 1784168404Spjd NULL, /* TX_LINK */ 1785168404Spjd NULL, /* TX_RENAME */ 1786219089Spjd ztest_replay_write, /* TX_WRITE */ 1787219089Spjd ztest_replay_truncate, /* TX_TRUNCATE */ 1788219089Spjd ztest_replay_setattr, /* TX_SETATTR */ 1789168404Spjd NULL, /* TX_ACL */ 1790209962Smm NULL, /* TX_CREATE_ACL */ 1791209962Smm NULL, /* TX_CREATE_ATTR */ 1792209962Smm NULL, /* TX_CREATE_ACL_ATTR */ 1793209962Smm NULL, /* TX_MKDIR_ACL */ 1794209962Smm NULL, /* TX_MKDIR_ATTR */ 1795209962Smm NULL, /* TX_MKDIR_ACL_ATTR */ 1796209962Smm NULL, /* TX_WRITE2 */ 1797168404Spjd}; 1798168404Spjd 1799168404Spjd/* 1800219089Spjd * ZIL get_data callbacks 1801219089Spjd */ 1802219089Spjd 1803219089Spjdstatic void 1804219089Spjdztest_get_done(zgd_t *zgd, int error) 1805219089Spjd{ 1806219089Spjd ztest_ds_t *zd = zgd->zgd_private; 1807219089Spjd uint64_t object = zgd->zgd_rl->rl_object; 1808219089Spjd 1809219089Spjd if (zgd->zgd_db) 1810219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1811219089Spjd 1812219089Spjd ztest_range_unlock(zgd->zgd_rl); 1813219089Spjd ztest_object_unlock(zd, object); 1814219089Spjd 1815219089Spjd if (error == 0 && zgd->zgd_bp) 1816219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1817219089Spjd 1818219089Spjd umem_free(zgd, sizeof (*zgd)); 1819219089Spjd} 1820219089Spjd 1821219089Spjdstatic int 1822219089Spjdztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1823219089Spjd{ 1824219089Spjd ztest_ds_t *zd = arg; 1825219089Spjd objset_t *os = zd->zd_os; 1826219089Spjd uint64_t object = lr->lr_foid; 1827219089Spjd uint64_t offset = lr->lr_offset; 1828219089Spjd uint64_t size = lr->lr_length; 1829219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1830219089Spjd uint64_t txg = lr->lr_common.lrc_txg; 1831219089Spjd uint64_t crtxg; 1832219089Spjd dmu_object_info_t doi; 1833219089Spjd dmu_buf_t *db; 1834219089Spjd zgd_t *zgd; 1835219089Spjd int error; 1836219089Spjd 1837219089Spjd ztest_object_lock(zd, object, RL_READER); 1838219089Spjd error = dmu_bonus_hold(os, object, FTAG, &db); 1839219089Spjd if (error) { 1840219089Spjd ztest_object_unlock(zd, object); 1841219089Spjd return (error); 1842219089Spjd } 1843219089Spjd 1844219089Spjd crtxg = ztest_bt_bonus(db)->bt_crtxg; 1845219089Spjd 1846219089Spjd if (crtxg == 0 || crtxg > txg) { 1847219089Spjd dmu_buf_rele(db, FTAG); 1848219089Spjd ztest_object_unlock(zd, object); 1849219089Spjd return (ENOENT); 1850219089Spjd } 1851219089Spjd 1852219089Spjd dmu_object_info_from_db(db, &doi); 1853219089Spjd dmu_buf_rele(db, FTAG); 1854219089Spjd db = NULL; 1855219089Spjd 1856219089Spjd zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); 1857219089Spjd zgd->zgd_zilog = zd->zd_zilog; 1858219089Spjd zgd->zgd_private = zd; 1859219089Spjd 1860219089Spjd if (buf != NULL) { /* immediate write */ 1861219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1862219089Spjd RL_READER); 1863219089Spjd 1864219089Spjd error = dmu_read(os, object, offset, size, buf, 1865219089Spjd DMU_READ_NO_PREFETCH); 1866219089Spjd ASSERT(error == 0); 1867219089Spjd } else { 1868219089Spjd size = doi.doi_data_block_size; 1869219089Spjd if (ISP2(size)) { 1870219089Spjd offset = P2ALIGN(offset, size); 1871219089Spjd } else { 1872219089Spjd ASSERT(offset < size); 1873219089Spjd offset = 0; 1874219089Spjd } 1875219089Spjd 1876219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1877219089Spjd RL_READER); 1878219089Spjd 1879219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1880219089Spjd DMU_READ_NO_PREFETCH); 1881219089Spjd 1882219089Spjd if (error == 0) { 1883243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1884243524Smm if (obp) { 1885243524Smm ASSERT(BP_IS_HOLE(bp)); 1886243524Smm *bp = *obp; 1887243524Smm } 1888243524Smm 1889219089Spjd zgd->zgd_db = db; 1890219089Spjd zgd->zgd_bp = bp; 1891219089Spjd 1892219089Spjd ASSERT(db->db_offset == offset); 1893219089Spjd ASSERT(db->db_size == size); 1894219089Spjd 1895219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1896219089Spjd ztest_get_done, zgd); 1897219089Spjd 1898219089Spjd if (error == 0) 1899219089Spjd return (0); 1900219089Spjd } 1901219089Spjd } 1902219089Spjd 1903219089Spjd ztest_get_done(zgd, error); 1904219089Spjd 1905219089Spjd return (error); 1906219089Spjd} 1907219089Spjd 1908219089Spjdstatic void * 1909219089Spjdztest_lr_alloc(size_t lrsize, char *name) 1910219089Spjd{ 1911219089Spjd char *lr; 1912219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1913219089Spjd 1914219089Spjd lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL); 1915219089Spjd 1916219089Spjd if (name) 1917219089Spjd bcopy(name, lr + lrsize, namesize); 1918219089Spjd 1919219089Spjd return (lr); 1920219089Spjd} 1921219089Spjd 1922219089Spjdvoid 1923219089Spjdztest_lr_free(void *lr, size_t lrsize, char *name) 1924219089Spjd{ 1925219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1926219089Spjd 1927219089Spjd umem_free(lr, lrsize + namesize); 1928219089Spjd} 1929219089Spjd 1930219089Spjd/* 1931219089Spjd * Lookup a bunch of objects. Returns the number of objects not found. 1932219089Spjd */ 1933219089Spjdstatic int 1934219089Spjdztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) 1935219089Spjd{ 1936219089Spjd int missing = 0; 1937219089Spjd int error; 1938219089Spjd 1939219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1940219089Spjd 1941219089Spjd for (int i = 0; i < count; i++, od++) { 1942219089Spjd od->od_object = 0; 1943219089Spjd error = zap_lookup(zd->zd_os, od->od_dir, od->od_name, 1944219089Spjd sizeof (uint64_t), 1, &od->od_object); 1945219089Spjd if (error) { 1946219089Spjd ASSERT(error == ENOENT); 1947219089Spjd ASSERT(od->od_object == 0); 1948219089Spjd missing++; 1949219089Spjd } else { 1950219089Spjd dmu_buf_t *db; 1951219089Spjd ztest_block_tag_t *bbt; 1952219089Spjd dmu_object_info_t doi; 1953219089Spjd 1954219089Spjd ASSERT(od->od_object != 0); 1955219089Spjd ASSERT(missing == 0); /* there should be no gaps */ 1956219089Spjd 1957219089Spjd ztest_object_lock(zd, od->od_object, RL_READER); 1958219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os, 1959219089Spjd od->od_object, FTAG, &db)); 1960219089Spjd dmu_object_info_from_db(db, &doi); 1961219089Spjd bbt = ztest_bt_bonus(db); 1962219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1963219089Spjd od->od_type = doi.doi_type; 1964219089Spjd od->od_blocksize = doi.doi_data_block_size; 1965219089Spjd od->od_gen = bbt->bt_gen; 1966219089Spjd dmu_buf_rele(db, FTAG); 1967219089Spjd ztest_object_unlock(zd, od->od_object); 1968219089Spjd } 1969219089Spjd } 1970219089Spjd 1971219089Spjd return (missing); 1972219089Spjd} 1973219089Spjd 1974219089Spjdstatic int 1975219089Spjdztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) 1976219089Spjd{ 1977219089Spjd int missing = 0; 1978219089Spjd 1979219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1980219089Spjd 1981219089Spjd for (int i = 0; i < count; i++, od++) { 1982219089Spjd if (missing) { 1983219089Spjd od->od_object = 0; 1984219089Spjd missing++; 1985219089Spjd continue; 1986219089Spjd } 1987219089Spjd 1988219089Spjd lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 1989219089Spjd 1990219089Spjd lr->lr_doid = od->od_dir; 1991219089Spjd lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */ 1992219089Spjd lr->lrz_type = od->od_crtype; 1993219089Spjd lr->lrz_blocksize = od->od_crblocksize; 1994219089Spjd lr->lrz_ibshift = ztest_random_ibshift(); 1995219089Spjd lr->lrz_bonustype = DMU_OT_UINT64_OTHER; 1996219089Spjd lr->lrz_bonuslen = dmu_bonus_max(); 1997219089Spjd lr->lr_gen = od->od_crgen; 1998219089Spjd lr->lr_crtime[0] = time(NULL); 1999219089Spjd 2000219089Spjd if (ztest_replay_create(zd, lr, B_FALSE) != 0) { 2001219089Spjd ASSERT(missing == 0); 2002219089Spjd od->od_object = 0; 2003219089Spjd missing++; 2004219089Spjd } else { 2005219089Spjd od->od_object = lr->lr_foid; 2006219089Spjd od->od_type = od->od_crtype; 2007219089Spjd od->od_blocksize = od->od_crblocksize; 2008219089Spjd od->od_gen = od->od_crgen; 2009219089Spjd ASSERT(od->od_object != 0); 2010219089Spjd } 2011219089Spjd 2012219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2013219089Spjd } 2014219089Spjd 2015219089Spjd return (missing); 2016219089Spjd} 2017219089Spjd 2018219089Spjdstatic int 2019219089Spjdztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) 2020219089Spjd{ 2021219089Spjd int missing = 0; 2022219089Spjd int error; 2023219089Spjd 2024219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 2025219089Spjd 2026219089Spjd od += count - 1; 2027219089Spjd 2028219089Spjd for (int i = count - 1; i >= 0; i--, od--) { 2029219089Spjd if (missing) { 2030219089Spjd missing++; 2031219089Spjd continue; 2032219089Spjd } 2033219089Spjd 2034243524Smm /* 2035243524Smm * No object was found. 2036243524Smm */ 2037219089Spjd if (od->od_object == 0) 2038219089Spjd continue; 2039219089Spjd 2040219089Spjd lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 2041219089Spjd 2042219089Spjd lr->lr_doid = od->od_dir; 2043219089Spjd 2044219089Spjd if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) { 2045219089Spjd ASSERT3U(error, ==, ENOSPC); 2046219089Spjd missing++; 2047219089Spjd } else { 2048219089Spjd od->od_object = 0; 2049219089Spjd } 2050219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2051219089Spjd } 2052219089Spjd 2053219089Spjd return (missing); 2054219089Spjd} 2055219089Spjd 2056219089Spjdstatic int 2057219089Spjdztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size, 2058219089Spjd void *data) 2059219089Spjd{ 2060219089Spjd lr_write_t *lr; 2061219089Spjd int error; 2062219089Spjd 2063219089Spjd lr = ztest_lr_alloc(sizeof (*lr) + size, NULL); 2064219089Spjd 2065219089Spjd lr->lr_foid = object; 2066219089Spjd lr->lr_offset = offset; 2067219089Spjd lr->lr_length = size; 2068219089Spjd lr->lr_blkoff = 0; 2069219089Spjd BP_ZERO(&lr->lr_blkptr); 2070219089Spjd 2071219089Spjd bcopy(data, lr + 1, size); 2072219089Spjd 2073219089Spjd error = ztest_replay_write(zd, lr, B_FALSE); 2074219089Spjd 2075219089Spjd ztest_lr_free(lr, sizeof (*lr) + size, NULL); 2076219089Spjd 2077219089Spjd return (error); 2078219089Spjd} 2079219089Spjd 2080219089Spjdstatic int 2081219089Spjdztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2082219089Spjd{ 2083219089Spjd lr_truncate_t *lr; 2084219089Spjd int error; 2085219089Spjd 2086219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2087219089Spjd 2088219089Spjd lr->lr_foid = object; 2089219089Spjd lr->lr_offset = offset; 2090219089Spjd lr->lr_length = size; 2091219089Spjd 2092219089Spjd error = ztest_replay_truncate(zd, lr, B_FALSE); 2093219089Spjd 2094219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2095219089Spjd 2096219089Spjd return (error); 2097219089Spjd} 2098219089Spjd 2099219089Spjdstatic int 2100219089Spjdztest_setattr(ztest_ds_t *zd, uint64_t object) 2101219089Spjd{ 2102219089Spjd lr_setattr_t *lr; 2103219089Spjd int error; 2104219089Spjd 2105219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2106219089Spjd 2107219089Spjd lr->lr_foid = object; 2108219089Spjd lr->lr_size = 0; 2109219089Spjd lr->lr_mode = 0; 2110219089Spjd 2111219089Spjd error = ztest_replay_setattr(zd, lr, B_FALSE); 2112219089Spjd 2113219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2114219089Spjd 2115219089Spjd return (error); 2116219089Spjd} 2117219089Spjd 2118219089Spjdstatic void 2119219089Spjdztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2120219089Spjd{ 2121219089Spjd objset_t *os = zd->zd_os; 2122219089Spjd dmu_tx_t *tx; 2123219089Spjd uint64_t txg; 2124219089Spjd rl_t *rl; 2125219089Spjd 2126219089Spjd txg_wait_synced(dmu_objset_pool(os), 0); 2127219089Spjd 2128219089Spjd ztest_object_lock(zd, object, RL_READER); 2129219089Spjd rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); 2130219089Spjd 2131219089Spjd tx = dmu_tx_create(os); 2132219089Spjd 2133219089Spjd dmu_tx_hold_write(tx, object, offset, size); 2134219089Spjd 2135219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 2136219089Spjd 2137219089Spjd if (txg != 0) { 2138219089Spjd dmu_prealloc(os, object, offset, size, tx); 2139219089Spjd dmu_tx_commit(tx); 2140219089Spjd txg_wait_synced(dmu_objset_pool(os), txg); 2141219089Spjd } else { 2142219089Spjd (void) dmu_free_long_range(os, object, offset, size); 2143219089Spjd } 2144219089Spjd 2145219089Spjd ztest_range_unlock(rl); 2146219089Spjd ztest_object_unlock(zd, object); 2147219089Spjd} 2148219089Spjd 2149219089Spjdstatic void 2150219089Spjdztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) 2151219089Spjd{ 2152243524Smm int err; 2153219089Spjd ztest_block_tag_t wbt; 2154219089Spjd dmu_object_info_t doi; 2155219089Spjd enum ztest_io_type io_type; 2156219089Spjd uint64_t blocksize; 2157219089Spjd void *data; 2158219089Spjd 2159219089Spjd VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0); 2160219089Spjd blocksize = doi.doi_data_block_size; 2161219089Spjd data = umem_alloc(blocksize, UMEM_NOFAIL); 2162219089Spjd 2163219089Spjd /* 2164219089Spjd * Pick an i/o type at random, biased toward writing block tags. 2165219089Spjd */ 2166219089Spjd io_type = ztest_random(ZTEST_IO_TYPES); 2167219089Spjd if (ztest_random(2) == 0) 2168219089Spjd io_type = ZTEST_IO_WRITE_TAG; 2169219089Spjd 2170224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2171224526Smm 2172219089Spjd switch (io_type) { 2173219089Spjd 2174219089Spjd case ZTEST_IO_WRITE_TAG: 2175219089Spjd ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0); 2176219089Spjd (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt); 2177219089Spjd break; 2178219089Spjd 2179219089Spjd case ZTEST_IO_WRITE_PATTERN: 2180219089Spjd (void) memset(data, 'a' + (object + offset) % 5, blocksize); 2181219089Spjd if (ztest_random(2) == 0) { 2182219089Spjd /* 2183219089Spjd * Induce fletcher2 collisions to ensure that 2184219089Spjd * zio_ddt_collision() detects and resolves them 2185219089Spjd * when using fletcher2-verify for deduplication. 2186219089Spjd */ 2187219089Spjd ((uint64_t *)data)[0] ^= 1ULL << 63; 2188219089Spjd ((uint64_t *)data)[4] ^= 1ULL << 63; 2189219089Spjd } 2190219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2191219089Spjd break; 2192219089Spjd 2193219089Spjd case ZTEST_IO_WRITE_ZEROES: 2194219089Spjd bzero(data, blocksize); 2195219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2196219089Spjd break; 2197219089Spjd 2198219089Spjd case ZTEST_IO_TRUNCATE: 2199219089Spjd (void) ztest_truncate(zd, object, offset, blocksize); 2200219089Spjd break; 2201219089Spjd 2202219089Spjd case ZTEST_IO_SETATTR: 2203219089Spjd (void) ztest_setattr(zd, object); 2204219089Spjd break; 2205243524Smm 2206243524Smm case ZTEST_IO_REWRITE: 2207243524Smm (void) rw_rdlock(&ztest_name_lock); 2208243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2209243524Smm ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa), 2210243524Smm B_FALSE); 2211243524Smm VERIFY(err == 0 || err == ENOSPC); 2212243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2213243524Smm ZFS_PROP_COMPRESSION, 2214243524Smm ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), 2215243524Smm B_FALSE); 2216243524Smm VERIFY(err == 0 || err == ENOSPC); 2217243524Smm (void) rw_unlock(&ztest_name_lock); 2218243524Smm 2219243524Smm VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, 2220243524Smm DMU_READ_NO_PREFETCH)); 2221243524Smm 2222243524Smm (void) ztest_write(zd, object, offset, blocksize, data); 2223243524Smm break; 2224219089Spjd } 2225219089Spjd 2226224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2227224526Smm 2228219089Spjd umem_free(data, blocksize); 2229219089Spjd} 2230219089Spjd 2231219089Spjd/* 2232219089Spjd * Initialize an object description template. 2233219089Spjd */ 2234219089Spjdstatic void 2235219089Spjdztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index, 2236219089Spjd dmu_object_type_t type, uint64_t blocksize, uint64_t gen) 2237219089Spjd{ 2238219089Spjd od->od_dir = ZTEST_DIROBJ; 2239219089Spjd od->od_object = 0; 2240219089Spjd 2241219089Spjd od->od_crtype = type; 2242219089Spjd od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize(); 2243219089Spjd od->od_crgen = gen; 2244219089Spjd 2245219089Spjd od->od_type = DMU_OT_NONE; 2246219089Spjd od->od_blocksize = 0; 2247219089Spjd od->od_gen = 0; 2248219089Spjd 2249219089Spjd (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]", 2250219089Spjd tag, (int64_t)id, index); 2251219089Spjd} 2252219089Spjd 2253219089Spjd/* 2254219089Spjd * Lookup or create the objects for a test using the od template. 2255219089Spjd * If the objects do not all exist, or if 'remove' is specified, 2256219089Spjd * remove any existing objects and create new ones. Otherwise, 2257219089Spjd * use the existing objects. 2258219089Spjd */ 2259219089Spjdstatic int 2260219089Spjdztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) 2261219089Spjd{ 2262219089Spjd int count = size / sizeof (*od); 2263219089Spjd int rv = 0; 2264219089Spjd 2265219089Spjd VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0); 2266219089Spjd if ((ztest_lookup(zd, od, count) != 0 || remove) && 2267219089Spjd (ztest_remove(zd, od, count) != 0 || 2268219089Spjd ztest_create(zd, od, count) != 0)) 2269219089Spjd rv = -1; 2270219089Spjd zd->zd_od = od; 2271219089Spjd VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2272219089Spjd 2273219089Spjd return (rv); 2274219089Spjd} 2275219089Spjd 2276219089Spjd/* ARGSUSED */ 2277219089Spjdvoid 2278219089Spjdztest_zil_commit(ztest_ds_t *zd, uint64_t id) 2279219089Spjd{ 2280219089Spjd zilog_t *zilog = zd->zd_zilog; 2281219089Spjd 2282224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2283224526Smm 2284219089Spjd zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); 2285219089Spjd 2286219089Spjd /* 2287219089Spjd * Remember the committed values in zd, which is in parent/child 2288219089Spjd * shared memory. If we die, the next iteration of ztest_run() 2289219089Spjd * will verify that the log really does contain this record. 2290219089Spjd */ 2291219089Spjd mutex_enter(&zilog->zl_lock); 2292236143Smm ASSERT(zd->zd_shared != NULL); 2293236143Smm ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq); 2294236143Smm zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq; 2295219089Spjd mutex_exit(&zilog->zl_lock); 2296224526Smm 2297224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2298219089Spjd} 2299219089Spjd 2300219089Spjd/* 2301224526Smm * This function is designed to simulate the operations that occur during a 2302224526Smm * mount/unmount operation. We hold the dataset across these operations in an 2303224526Smm * attempt to expose any implicit assumptions about ZIL management. 2304224526Smm */ 2305224526Smm/* ARGSUSED */ 2306224526Smmvoid 2307224526Smmztest_zil_remount(ztest_ds_t *zd, uint64_t id) 2308224526Smm{ 2309224526Smm objset_t *os = zd->zd_os; 2310224526Smm 2311243524Smm /* 2312243524Smm * We grab the zd_dirobj_lock to ensure that no other thread is 2313243524Smm * updating the zil (i.e. adding in-memory log records) and the 2314243524Smm * zd_zilog_lock to block any I/O. 2315243524Smm */ 2316243524Smm VERIFY0(mutex_lock(&zd->zd_dirobj_lock)); 2317224526Smm (void) rw_wrlock(&zd->zd_zilog_lock); 2318224526Smm 2319224526Smm /* zfsvfs_teardown() */ 2320224526Smm zil_close(zd->zd_zilog); 2321224526Smm 2322224526Smm /* zfsvfs_setup() */ 2323224526Smm VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); 2324224526Smm zil_replay(os, zd, ztest_replay_vector); 2325224526Smm 2326224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2327239620Smm VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2328224526Smm} 2329224526Smm 2330224526Smm/* 2331168404Spjd * Verify that we can't destroy an active pool, create an existing pool, 2332168404Spjd * or create a pool with a bad vdev spec. 2333168404Spjd */ 2334219089Spjd/* ARGSUSED */ 2335168404Spjdvoid 2336219089Spjdztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) 2337168404Spjd{ 2338236143Smm ztest_shared_opts_t *zo = &ztest_opts; 2339168404Spjd spa_t *spa; 2340168404Spjd nvlist_t *nvroot; 2341168404Spjd 2342168404Spjd /* 2343168404Spjd * Attempt to create using a bad file. 2344168404Spjd */ 2345243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2346219089Spjd VERIFY3U(ENOENT, ==, 2347248571Smm spa_create("ztest_bad_file", nvroot, NULL, NULL)); 2348168404Spjd nvlist_free(nvroot); 2349168404Spjd 2350168404Spjd /* 2351168404Spjd * Attempt to create using a bad mirror. 2352168404Spjd */ 2353243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); 2354219089Spjd VERIFY3U(ENOENT, ==, 2355248571Smm spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); 2356168404Spjd nvlist_free(nvroot); 2357168404Spjd 2358168404Spjd /* 2359168404Spjd * Attempt to create an existing pool. It shouldn't matter 2360168404Spjd * what's in the nvroot; we should fail with EEXIST. 2361168404Spjd */ 2362236143Smm (void) rw_rdlock(&ztest_name_lock); 2363243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2364248571Smm VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); 2365168404Spjd nvlist_free(nvroot); 2366236143Smm VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); 2367236143Smm VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); 2368219089Spjd spa_close(spa, FTAG); 2369168404Spjd 2370236143Smm (void) rw_unlock(&ztest_name_lock); 2371168404Spjd} 2372168404Spjd 2373243505Smm/* ARGSUSED */ 2374243505Smmvoid 2375243505Smmztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) 2376243505Smm{ 2377243505Smm spa_t *spa; 2378243505Smm uint64_t initial_version = SPA_VERSION_INITIAL; 2379243505Smm uint64_t version, newversion; 2380243505Smm nvlist_t *nvroot, *props; 2381243505Smm char *name; 2382243505Smm 2383243505Smm VERIFY0(mutex_lock(&ztest_vdev_lock)); 2384243505Smm name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool); 2385243505Smm 2386243505Smm /* 2387243505Smm * Clean up from previous runs. 2388243505Smm */ 2389243505Smm (void) spa_destroy(name); 2390243505Smm 2391243505Smm nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0, 2392243505Smm 0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1); 2393243505Smm 2394243505Smm /* 2395243505Smm * If we're configuring a RAIDZ device then make sure that the 2396243505Smm * the initial version is capable of supporting that feature. 2397243505Smm */ 2398243505Smm switch (ztest_opts.zo_raidz_parity) { 2399243505Smm case 0: 2400243505Smm case 1: 2401243505Smm initial_version = SPA_VERSION_INITIAL; 2402243505Smm break; 2403243505Smm case 2: 2404243505Smm initial_version = SPA_VERSION_RAIDZ2; 2405243505Smm break; 2406243505Smm case 3: 2407243505Smm initial_version = SPA_VERSION_RAIDZ3; 2408243505Smm break; 2409243505Smm } 2410243505Smm 2411243505Smm /* 2412243505Smm * Create a pool with a spa version that can be upgraded. Pick 2413243505Smm * a value between initial_version and SPA_VERSION_BEFORE_FEATURES. 2414243505Smm */ 2415243505Smm do { 2416243505Smm version = ztest_random_spa_version(initial_version); 2417243505Smm } while (version > SPA_VERSION_BEFORE_FEATURES); 2418243505Smm 2419243505Smm props = fnvlist_alloc(); 2420243505Smm fnvlist_add_uint64(props, 2421243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION), version); 2422248571Smm VERIFY0(spa_create(name, nvroot, props, NULL)); 2423243505Smm fnvlist_free(nvroot); 2424243505Smm fnvlist_free(props); 2425243505Smm 2426243505Smm VERIFY0(spa_open(name, &spa, FTAG)); 2427243505Smm VERIFY3U(spa_version(spa), ==, version); 2428243505Smm newversion = ztest_random_spa_version(version + 1); 2429243505Smm 2430243505Smm if (ztest_opts.zo_verbose >= 4) { 2431243505Smm (void) printf("upgrading spa version from %llu to %llu\n", 2432243505Smm (u_longlong_t)version, (u_longlong_t)newversion); 2433243505Smm } 2434243505Smm 2435243505Smm spa_upgrade(spa, newversion); 2436243505Smm VERIFY3U(spa_version(spa), >, version); 2437243505Smm VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config, 2438243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION))); 2439243505Smm spa_close(spa, FTAG); 2440243505Smm 2441243505Smm strfree(name); 2442243505Smm VERIFY0(mutex_unlock(&ztest_vdev_lock)); 2443243505Smm} 2444243505Smm 2445185029Spjdstatic vdev_t * 2446185029Spjdvdev_lookup_by_path(vdev_t *vd, const char *path) 2447185029Spjd{ 2448185029Spjd vdev_t *mvd; 2449185029Spjd 2450185029Spjd if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) 2451185029Spjd return (vd); 2452185029Spjd 2453185029Spjd for (int c = 0; c < vd->vdev_children; c++) 2454185029Spjd if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != 2455185029Spjd NULL) 2456185029Spjd return (mvd); 2457185029Spjd 2458185029Spjd return (NULL); 2459185029Spjd} 2460185029Spjd 2461168404Spjd/* 2462219089Spjd * Find the first available hole which can be used as a top-level. 2463219089Spjd */ 2464219089Spjdint 2465219089Spjdfind_vdev_hole(spa_t *spa) 2466219089Spjd{ 2467219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2468219089Spjd int c; 2469219089Spjd 2470219089Spjd ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV); 2471219089Spjd 2472219089Spjd for (c = 0; c < rvd->vdev_children; c++) { 2473219089Spjd vdev_t *cvd = rvd->vdev_child[c]; 2474219089Spjd 2475219089Spjd if (cvd->vdev_ishole) 2476219089Spjd break; 2477219089Spjd } 2478219089Spjd return (c); 2479219089Spjd} 2480219089Spjd 2481219089Spjd/* 2482168404Spjd * Verify that vdev_add() works as expected. 2483168404Spjd */ 2484219089Spjd/* ARGSUSED */ 2485168404Spjdvoid 2486219089Spjdztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) 2487168404Spjd{ 2488219089Spjd ztest_shared_t *zs = ztest_shared; 2489236143Smm spa_t *spa = ztest_spa; 2490219089Spjd uint64_t leaves; 2491219089Spjd uint64_t guid; 2492168404Spjd nvlist_t *nvroot; 2493168404Spjd int error; 2494168404Spjd 2495236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2496248571Smm leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; 2497168404Spjd 2498185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2499168404Spjd 2500219089Spjd ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; 2501168404Spjd 2502185029Spjd /* 2503219089Spjd * If we have slogs then remove them 1/4 of the time. 2504185029Spjd */ 2505219089Spjd if (spa_has_slogs(spa) && ztest_random(4) == 0) { 2506219089Spjd /* 2507219089Spjd * Grab the guid from the head of the log class rotor. 2508219089Spjd */ 2509219089Spjd guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid; 2510185029Spjd 2511219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2512168404Spjd 2513219089Spjd /* 2514219089Spjd * We have to grab the zs_name_lock as writer to 2515219089Spjd * prevent a race between removing a slog (dmu_objset_find) 2516219089Spjd * and destroying a dataset. Removing the slog will 2517219089Spjd * grab a reference on the dataset which may cause 2518219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 2519219089Spjd * leaving the dataset in an inconsistent state. 2520219089Spjd */ 2521236143Smm VERIFY(rw_wrlock(&ztest_name_lock) == 0); 2522219089Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2523236143Smm VERIFY(rw_unlock(&ztest_name_lock) == 0); 2524168404Spjd 2525219089Spjd if (error && error != EEXIST) 2526219089Spjd fatal(0, "spa_vdev_remove() = %d", error); 2527219089Spjd } else { 2528219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2529219089Spjd 2530219089Spjd /* 2531219089Spjd * Make 1/4 of the devices be log devices. 2532219089Spjd */ 2533243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, 2534236143Smm ztest_opts.zo_vdev_size, 0, 2535236143Smm ztest_random(4) == 0, ztest_opts.zo_raidz, 2536236143Smm zs->zs_mirrors, 1); 2537219089Spjd 2538219089Spjd error = spa_vdev_add(spa, nvroot); 2539219089Spjd nvlist_free(nvroot); 2540219089Spjd 2541219089Spjd if (error == ENOSPC) 2542219089Spjd ztest_record_enospc("spa_vdev_add"); 2543219089Spjd else if (error != 0) 2544219089Spjd fatal(0, "spa_vdev_add() = %d", error); 2545219089Spjd } 2546219089Spjd 2547236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2548168404Spjd} 2549168404Spjd 2550185029Spjd/* 2551185029Spjd * Verify that adding/removing aux devices (l2arc, hot spare) works as expected. 2552185029Spjd */ 2553219089Spjd/* ARGSUSED */ 2554185029Spjdvoid 2555219089Spjdztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) 2556168404Spjd{ 2557219089Spjd ztest_shared_t *zs = ztest_shared; 2558236143Smm spa_t *spa = ztest_spa; 2559185029Spjd vdev_t *rvd = spa->spa_root_vdev; 2560185029Spjd spa_aux_vdev_t *sav; 2561185029Spjd char *aux; 2562185029Spjd uint64_t guid = 0; 2563185029Spjd int error; 2564168404Spjd 2565185029Spjd if (ztest_random(2) == 0) { 2566185029Spjd sav = &spa->spa_spares; 2567185029Spjd aux = ZPOOL_CONFIG_SPARES; 2568185029Spjd } else { 2569185029Spjd sav = &spa->spa_l2cache; 2570185029Spjd aux = ZPOOL_CONFIG_L2CACHE; 2571185029Spjd } 2572185029Spjd 2573236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2574185029Spjd 2575185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2576185029Spjd 2577185029Spjd if (sav->sav_count != 0 && ztest_random(4) == 0) { 2578185029Spjd /* 2579185029Spjd * Pick a random device to remove. 2580185029Spjd */ 2581185029Spjd guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid; 2582185029Spjd } else { 2583185029Spjd /* 2584185029Spjd * Find an unused device we can add. 2585185029Spjd */ 2586219089Spjd zs->zs_vdev_aux = 0; 2587185029Spjd for (;;) { 2588185029Spjd char path[MAXPATHLEN]; 2589185029Spjd int c; 2590236143Smm (void) snprintf(path, sizeof (path), ztest_aux_template, 2591236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, aux, 2592236143Smm zs->zs_vdev_aux); 2593185029Spjd for (c = 0; c < sav->sav_count; c++) 2594185029Spjd if (strcmp(sav->sav_vdevs[c]->vdev_path, 2595185029Spjd path) == 0) 2596185029Spjd break; 2597185029Spjd if (c == sav->sav_count && 2598185029Spjd vdev_lookup_by_path(rvd, path) == NULL) 2599185029Spjd break; 2600219089Spjd zs->zs_vdev_aux++; 2601168404Spjd } 2602168404Spjd } 2603168404Spjd 2604185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2605168404Spjd 2606185029Spjd if (guid == 0) { 2607185029Spjd /* 2608185029Spjd * Add a new device. 2609185029Spjd */ 2610243505Smm nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL, 2611236143Smm (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1); 2612185029Spjd error = spa_vdev_add(spa, nvroot); 2613185029Spjd if (error != 0) 2614185029Spjd fatal(0, "spa_vdev_add(%p) = %d", nvroot, error); 2615185029Spjd nvlist_free(nvroot); 2616185029Spjd } else { 2617185029Spjd /* 2618185029Spjd * Remove an existing device. Sometimes, dirty its 2619185029Spjd * vdev state first to make sure we handle removal 2620185029Spjd * of devices that have pending state changes. 2621185029Spjd */ 2622185029Spjd if (ztest_random(2) == 0) 2623219089Spjd (void) vdev_online(spa, guid, 0, NULL); 2624185029Spjd 2625185029Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2626185029Spjd if (error != 0 && error != EBUSY) 2627185029Spjd fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); 2628185029Spjd } 2629185029Spjd 2630236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2631168404Spjd} 2632168404Spjd 2633168404Spjd/* 2634219089Spjd * split a pool if it has mirror tlvdevs 2635219089Spjd */ 2636219089Spjd/* ARGSUSED */ 2637219089Spjdvoid 2638219089Spjdztest_split_pool(ztest_ds_t *zd, uint64_t id) 2639219089Spjd{ 2640219089Spjd ztest_shared_t *zs = ztest_shared; 2641236143Smm spa_t *spa = ztest_spa; 2642219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2643219089Spjd nvlist_t *tree, **child, *config, *split, **schild; 2644219089Spjd uint_t c, children, schildren = 0, lastlogid = 0; 2645219089Spjd int error = 0; 2646219089Spjd 2647236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2648219089Spjd 2649219089Spjd /* ensure we have a useable config; mirrors of raidz aren't supported */ 2650236143Smm if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) { 2651236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2652219089Spjd return; 2653219089Spjd } 2654219089Spjd 2655219089Spjd /* clean up the old pool, if any */ 2656219089Spjd (void) spa_destroy("splitp"); 2657219089Spjd 2658219089Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2659219089Spjd 2660219089Spjd /* generate a config from the existing config */ 2661219089Spjd mutex_enter(&spa->spa_props_lock); 2662219089Spjd VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE, 2663219089Spjd &tree) == 0); 2664219089Spjd mutex_exit(&spa->spa_props_lock); 2665219089Spjd 2666219089Spjd VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, 2667219089Spjd &children) == 0); 2668219089Spjd 2669219089Spjd schild = malloc(rvd->vdev_children * sizeof (nvlist_t *)); 2670219089Spjd for (c = 0; c < children; c++) { 2671219089Spjd vdev_t *tvd = rvd->vdev_child[c]; 2672219089Spjd nvlist_t **mchild; 2673219089Spjd uint_t mchildren; 2674219089Spjd 2675219089Spjd if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) { 2676219089Spjd VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME, 2677219089Spjd 0) == 0); 2678219089Spjd VERIFY(nvlist_add_string(schild[schildren], 2679219089Spjd ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0); 2680219089Spjd VERIFY(nvlist_add_uint64(schild[schildren], 2681219089Spjd ZPOOL_CONFIG_IS_HOLE, 1) == 0); 2682219089Spjd if (lastlogid == 0) 2683219089Spjd lastlogid = schildren; 2684219089Spjd ++schildren; 2685219089Spjd continue; 2686219089Spjd } 2687219089Spjd lastlogid = 0; 2688219089Spjd VERIFY(nvlist_lookup_nvlist_array(child[c], 2689219089Spjd ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); 2690219089Spjd VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0); 2691219089Spjd } 2692219089Spjd 2693219089Spjd /* OK, create a config that can be used to split */ 2694219089Spjd VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0); 2695219089Spjd VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE, 2696219089Spjd VDEV_TYPE_ROOT) == 0); 2697219089Spjd VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild, 2698219089Spjd lastlogid != 0 ? lastlogid : schildren) == 0); 2699219089Spjd 2700219089Spjd VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0); 2701219089Spjd VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0); 2702219089Spjd 2703219089Spjd for (c = 0; c < schildren; c++) 2704219089Spjd nvlist_free(schild[c]); 2705219089Spjd free(schild); 2706219089Spjd nvlist_free(split); 2707219089Spjd 2708219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2709219089Spjd 2710236143Smm (void) rw_wrlock(&ztest_name_lock); 2711219089Spjd error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); 2712236143Smm (void) rw_unlock(&ztest_name_lock); 2713219089Spjd 2714219089Spjd nvlist_free(config); 2715219089Spjd 2716219089Spjd if (error == 0) { 2717219089Spjd (void) printf("successful split - results:\n"); 2718219089Spjd mutex_enter(&spa_namespace_lock); 2719219089Spjd show_pool_stats(spa); 2720219089Spjd show_pool_stats(spa_lookup("splitp")); 2721219089Spjd mutex_exit(&spa_namespace_lock); 2722219089Spjd ++zs->zs_splits; 2723219089Spjd --zs->zs_mirrors; 2724219089Spjd } 2725236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2726219089Spjd 2727219089Spjd} 2728219089Spjd 2729219089Spjd/* 2730168404Spjd * Verify that we can attach and detach devices. 2731168404Spjd */ 2732219089Spjd/* ARGSUSED */ 2733168404Spjdvoid 2734219089Spjdztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) 2735168404Spjd{ 2736219089Spjd ztest_shared_t *zs = ztest_shared; 2737236143Smm spa_t *spa = ztest_spa; 2738185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 2739168404Spjd vdev_t *rvd = spa->spa_root_vdev; 2740168404Spjd vdev_t *oldvd, *newvd, *pvd; 2741185029Spjd nvlist_t *root; 2742219089Spjd uint64_t leaves; 2743168404Spjd uint64_t leaf, top; 2744168404Spjd uint64_t ashift = ztest_get_ashift(); 2745209962Smm uint64_t oldguid, pguid; 2746254112Sdelphij uint64_t oldsize, newsize; 2747168404Spjd char oldpath[MAXPATHLEN], newpath[MAXPATHLEN]; 2748168404Spjd int replacing; 2749185029Spjd int oldvd_has_siblings = B_FALSE; 2750185029Spjd int newvd_is_spare = B_FALSE; 2751185029Spjd int oldvd_is_log; 2752168404Spjd int error, expected_error; 2753168404Spjd 2754236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2755236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 2756168404Spjd 2757185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2758168404Spjd 2759168404Spjd /* 2760168404Spjd * Decide whether to do an attach or a replace. 2761168404Spjd */ 2762168404Spjd replacing = ztest_random(2); 2763168404Spjd 2764168404Spjd /* 2765168404Spjd * Pick a random top-level vdev. 2766168404Spjd */ 2767219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 2768168404Spjd 2769168404Spjd /* 2770168404Spjd * Pick a random leaf within it. 2771168404Spjd */ 2772168404Spjd leaf = ztest_random(leaves); 2773168404Spjd 2774168404Spjd /* 2775185029Spjd * Locate this vdev. 2776168404Spjd */ 2777185029Spjd oldvd = rvd->vdev_child[top]; 2778219089Spjd if (zs->zs_mirrors >= 1) { 2779209962Smm ASSERT(oldvd->vdev_ops == &vdev_mirror_ops); 2780219089Spjd ASSERT(oldvd->vdev_children >= zs->zs_mirrors); 2781236143Smm oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz]; 2782209962Smm } 2783236143Smm if (ztest_opts.zo_raidz > 1) { 2784209962Smm ASSERT(oldvd->vdev_ops == &vdev_raidz_ops); 2785236143Smm ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz); 2786236143Smm oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz]; 2787209962Smm } 2788168404Spjd 2789168404Spjd /* 2790185029Spjd * If we're already doing an attach or replace, oldvd may be a 2791185029Spjd * mirror vdev -- in which case, pick a random child. 2792168404Spjd */ 2793185029Spjd while (oldvd->vdev_children != 0) { 2794185029Spjd oldvd_has_siblings = B_TRUE; 2795209962Smm ASSERT(oldvd->vdev_children >= 2); 2796209962Smm oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)]; 2797185029Spjd } 2798168404Spjd 2799185029Spjd oldguid = oldvd->vdev_guid; 2800219089Spjd oldsize = vdev_get_min_asize(oldvd); 2801185029Spjd oldvd_is_log = oldvd->vdev_top->vdev_islog; 2802185029Spjd (void) strcpy(oldpath, oldvd->vdev_path); 2803185029Spjd pvd = oldvd->vdev_parent; 2804209962Smm pguid = pvd->vdev_guid; 2805185029Spjd 2806168404Spjd /* 2807185029Spjd * If oldvd has siblings, then half of the time, detach it. 2808168404Spjd */ 2809185029Spjd if (oldvd_has_siblings && ztest_random(2) == 0) { 2810185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2811209962Smm error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE); 2812209962Smm if (error != 0 && error != ENODEV && error != EBUSY && 2813209962Smm error != ENOTSUP) 2814209962Smm fatal(0, "detach (%s) returned %d", oldpath, error); 2815236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2816185029Spjd return; 2817185029Spjd } 2818168404Spjd 2819168404Spjd /* 2820185029Spjd * For the new vdev, choose with equal probability between the two 2821185029Spjd * standard paths (ending in either 'a' or 'b') or a random hot spare. 2822168404Spjd */ 2823185029Spjd if (sav->sav_count != 0 && ztest_random(3) == 0) { 2824185029Spjd newvd = sav->sav_vdevs[ztest_random(sav->sav_count)]; 2825185029Spjd newvd_is_spare = B_TRUE; 2826185029Spjd (void) strcpy(newpath, newvd->vdev_path); 2827185029Spjd } else { 2828185029Spjd (void) snprintf(newpath, sizeof (newpath), ztest_dev_template, 2829236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 2830236143Smm top * leaves + leaf); 2831185029Spjd if (ztest_random(2) == 0) 2832185029Spjd newpath[strlen(newpath) - 1] = 'b'; 2833185029Spjd newvd = vdev_lookup_by_path(rvd, newpath); 2834185029Spjd } 2835168404Spjd 2836185029Spjd if (newvd) { 2837219089Spjd newsize = vdev_get_min_asize(newvd); 2838185029Spjd } else { 2839185029Spjd /* 2840185029Spjd * Make newsize a little bigger or smaller than oldsize. 2841185029Spjd * If it's smaller, the attach should fail. 2842185029Spjd * If it's larger, and we're doing a replace, 2843185029Spjd * we should get dynamic LUN growth when we're done. 2844185029Spjd */ 2845185029Spjd newsize = 10 * oldsize / (9 + ztest_random(3)); 2846185029Spjd } 2847185029Spjd 2848168404Spjd /* 2849168404Spjd * If pvd is not a mirror or root, the attach should fail with ENOTSUP, 2850168404Spjd * unless it's a replace; in that case any non-replacing parent is OK. 2851168404Spjd * 2852168404Spjd * If newvd is already part of the pool, it should fail with EBUSY. 2853168404Spjd * 2854168404Spjd * If newvd is too small, it should fail with EOVERFLOW. 2855168404Spjd */ 2856185029Spjd if (pvd->vdev_ops != &vdev_mirror_ops && 2857185029Spjd pvd->vdev_ops != &vdev_root_ops && (!replacing || 2858185029Spjd pvd->vdev_ops == &vdev_replacing_ops || 2859185029Spjd pvd->vdev_ops == &vdev_spare_ops)) 2860185029Spjd expected_error = ENOTSUP; 2861185029Spjd else if (newvd_is_spare && (!replacing || oldvd_is_log)) 2862185029Spjd expected_error = ENOTSUP; 2863185029Spjd else if (newvd == oldvd) 2864185029Spjd expected_error = replacing ? 0 : EBUSY; 2865185029Spjd else if (vdev_lookup_by_path(rvd, newpath) != NULL) 2866168404Spjd expected_error = EBUSY; 2867168404Spjd else if (newsize < oldsize) 2868168404Spjd expected_error = EOVERFLOW; 2869168404Spjd else if (ashift > oldvd->vdev_top->vdev_ashift) 2870168404Spjd expected_error = EDOM; 2871168404Spjd else 2872168404Spjd expected_error = 0; 2873168404Spjd 2874185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2875168404Spjd 2876168404Spjd /* 2877168404Spjd * Build the nvlist describing newpath. 2878168404Spjd */ 2879243505Smm root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0, 2880185029Spjd ashift, 0, 0, 0, 1); 2881168404Spjd 2882185029Spjd error = spa_vdev_attach(spa, oldguid, root, replacing); 2883168404Spjd 2884168404Spjd nvlist_free(root); 2885168404Spjd 2886168404Spjd /* 2887168404Spjd * If our parent was the replacing vdev, but the replace completed, 2888168404Spjd * then instead of failing with ENOTSUP we may either succeed, 2889168404Spjd * fail with ENODEV, or fail with EOVERFLOW. 2890168404Spjd */ 2891168404Spjd if (expected_error == ENOTSUP && 2892168404Spjd (error == 0 || error == ENODEV || error == EOVERFLOW)) 2893168404Spjd expected_error = error; 2894168404Spjd 2895168404Spjd /* 2896168404Spjd * If someone grew the LUN, the replacement may be too small. 2897168404Spjd */ 2898185029Spjd if (error == EOVERFLOW || error == EBUSY) 2899168404Spjd expected_error = error; 2900168404Spjd 2901185029Spjd /* XXX workaround 6690467 */ 2902185029Spjd if (error != expected_error && expected_error != EBUSY) { 2903185029Spjd fatal(0, "attach (%s %llu, %s %llu, %d) " 2904185029Spjd "returned %d, expected %d", 2905254112Sdelphij oldpath, oldsize, newpath, 2906254112Sdelphij newsize, replacing, error, expected_error); 2907168404Spjd } 2908168404Spjd 2909236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2910168404Spjd} 2911168404Spjd 2912168404Spjd/* 2913219089Spjd * Callback function which expands the physical size of the vdev. 2914168404Spjd */ 2915219089Spjdvdev_t * 2916219089Spjdgrow_vdev(vdev_t *vd, void *arg) 2917168404Spjd{ 2918219089Spjd spa_t *spa = vd->vdev_spa; 2919219089Spjd size_t *newsize = arg; 2920168404Spjd size_t fsize; 2921168404Spjd int fd; 2922168404Spjd 2923219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2924219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2925168404Spjd 2926219089Spjd if ((fd = open(vd->vdev_path, O_RDWR)) == -1) 2927219089Spjd return (vd); 2928219089Spjd 2929219089Spjd fsize = lseek(fd, 0, SEEK_END); 2930219089Spjd (void) ftruncate(fd, *newsize); 2931219089Spjd 2932236143Smm if (ztest_opts.zo_verbose >= 6) { 2933219089Spjd (void) printf("%s grew from %lu to %lu bytes\n", 2934219089Spjd vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize); 2935219089Spjd } 2936219089Spjd (void) close(fd); 2937219089Spjd return (NULL); 2938219089Spjd} 2939219089Spjd 2940219089Spjd/* 2941219089Spjd * Callback function which expands a given vdev by calling vdev_online(). 2942219089Spjd */ 2943219089Spjd/* ARGSUSED */ 2944219089Spjdvdev_t * 2945219089Spjdonline_vdev(vdev_t *vd, void *arg) 2946219089Spjd{ 2947219089Spjd spa_t *spa = vd->vdev_spa; 2948219089Spjd vdev_t *tvd = vd->vdev_top; 2949219089Spjd uint64_t guid = vd->vdev_guid; 2950219089Spjd uint64_t generation = spa->spa_config_generation + 1; 2951219089Spjd vdev_state_t newstate = VDEV_STATE_UNKNOWN; 2952219089Spjd int error; 2953219089Spjd 2954219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2955219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2956219089Spjd 2957219089Spjd /* Calling vdev_online will initialize the new metaslabs */ 2958219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2959219089Spjd error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate); 2960219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 2961219089Spjd 2962168404Spjd /* 2963219089Spjd * If vdev_online returned an error or the underlying vdev_open 2964219089Spjd * failed then we abort the expand. The only way to know that 2965219089Spjd * vdev_open fails is by checking the returned newstate. 2966168404Spjd */ 2967219089Spjd if (error || newstate != VDEV_STATE_HEALTHY) { 2968236143Smm if (ztest_opts.zo_verbose >= 5) { 2969219089Spjd (void) printf("Unable to expand vdev, state %llu, " 2970219089Spjd "error %d\n", (u_longlong_t)newstate, error); 2971219089Spjd } 2972219089Spjd return (vd); 2973219089Spjd } 2974219089Spjd ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY); 2975168404Spjd 2976219089Spjd /* 2977219089Spjd * Since we dropped the lock we need to ensure that we're 2978219089Spjd * still talking to the original vdev. It's possible this 2979219089Spjd * vdev may have been detached/replaced while we were 2980219089Spjd * trying to online it. 2981219089Spjd */ 2982219089Spjd if (generation != spa->spa_config_generation) { 2983236143Smm if (ztest_opts.zo_verbose >= 5) { 2984219089Spjd (void) printf("vdev configuration has changed, " 2985219089Spjd "guid %llu, state %llu, expected gen %llu, " 2986219089Spjd "got gen %llu\n", 2987219089Spjd (u_longlong_t)guid, 2988219089Spjd (u_longlong_t)tvd->vdev_state, 2989219089Spjd (u_longlong_t)generation, 2990219089Spjd (u_longlong_t)spa->spa_config_generation); 2991219089Spjd } 2992219089Spjd return (vd); 2993219089Spjd } 2994219089Spjd return (NULL); 2995219089Spjd} 2996168404Spjd 2997219089Spjd/* 2998219089Spjd * Traverse the vdev tree calling the supplied function. 2999219089Spjd * We continue to walk the tree until we either have walked all 3000219089Spjd * children or we receive a non-NULL return from the callback. 3001219089Spjd * If a NULL callback is passed, then we just return back the first 3002219089Spjd * leaf vdev we encounter. 3003219089Spjd */ 3004219089Spjdvdev_t * 3005219089Spjdvdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg) 3006219089Spjd{ 3007219089Spjd if (vd->vdev_ops->vdev_op_leaf) { 3008219089Spjd if (func == NULL) 3009219089Spjd return (vd); 3010219089Spjd else 3011219089Spjd return (func(vd, arg)); 3012219089Spjd } 3013168404Spjd 3014219089Spjd for (uint_t c = 0; c < vd->vdev_children; c++) { 3015219089Spjd vdev_t *cvd = vd->vdev_child[c]; 3016219089Spjd if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL) 3017219089Spjd return (cvd); 3018219089Spjd } 3019219089Spjd return (NULL); 3020219089Spjd} 3021219089Spjd 3022219089Spjd/* 3023219089Spjd * Verify that dynamic LUN growth works as expected. 3024219089Spjd */ 3025219089Spjd/* ARGSUSED */ 3026219089Spjdvoid 3027219089Spjdztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) 3028219089Spjd{ 3029236143Smm spa_t *spa = ztest_spa; 3030219089Spjd vdev_t *vd, *tvd; 3031219089Spjd metaslab_class_t *mc; 3032219089Spjd metaslab_group_t *mg; 3033219089Spjd size_t psize, newsize; 3034219089Spjd uint64_t top; 3035219089Spjd uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; 3036219089Spjd 3037236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 3038219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3039219089Spjd 3040219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 3041219089Spjd 3042219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3043219089Spjd mg = tvd->vdev_mg; 3044219089Spjd mc = mg->mg_class; 3045219089Spjd old_ms_count = tvd->vdev_ms_count; 3046219089Spjd old_class_space = metaslab_class_get_space(mc); 3047219089Spjd 3048219089Spjd /* 3049219089Spjd * Determine the size of the first leaf vdev associated with 3050219089Spjd * our top-level device. 3051219089Spjd */ 3052219089Spjd vd = vdev_walk_tree(tvd, NULL, NULL); 3053219089Spjd ASSERT3P(vd, !=, NULL); 3054219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 3055219089Spjd 3056219089Spjd psize = vd->vdev_psize; 3057219089Spjd 3058219089Spjd /* 3059219089Spjd * We only try to expand the vdev if it's healthy, less than 4x its 3060219089Spjd * original size, and it has a valid psize. 3061219089Spjd */ 3062219089Spjd if (tvd->vdev_state != VDEV_STATE_HEALTHY || 3063236143Smm psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) { 3064219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3065236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3066219089Spjd return; 3067219089Spjd } 3068219089Spjd ASSERT(psize > 0); 3069219089Spjd newsize = psize + psize / 8; 3070219089Spjd ASSERT3U(newsize, >, psize); 3071219089Spjd 3072236143Smm if (ztest_opts.zo_verbose >= 6) { 3073219089Spjd (void) printf("Expanding LUN %s from %lu to %lu\n", 3074219089Spjd vd->vdev_path, (ulong_t)psize, (ulong_t)newsize); 3075219089Spjd } 3076219089Spjd 3077219089Spjd /* 3078219089Spjd * Growing the vdev is a two step process: 3079219089Spjd * 1). expand the physical size (i.e. relabel) 3080219089Spjd * 2). online the vdev to create the new metaslabs 3081219089Spjd */ 3082219089Spjd if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL || 3083219089Spjd vdev_walk_tree(tvd, online_vdev, NULL) != NULL || 3084219089Spjd tvd->vdev_state != VDEV_STATE_HEALTHY) { 3085236143Smm if (ztest_opts.zo_verbose >= 5) { 3086219089Spjd (void) printf("Could not expand LUN because " 3087219089Spjd "the vdev configuration changed.\n"); 3088168404Spjd } 3089219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3090236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3091219089Spjd return; 3092168404Spjd } 3093168404Spjd 3094219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3095219089Spjd 3096219089Spjd /* 3097219089Spjd * Expanding the LUN will update the config asynchronously, 3098219089Spjd * thus we must wait for the async thread to complete any 3099219089Spjd * pending tasks before proceeding. 3100219089Spjd */ 3101219089Spjd for (;;) { 3102219089Spjd boolean_t done; 3103219089Spjd mutex_enter(&spa->spa_async_lock); 3104219089Spjd done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks); 3105219089Spjd mutex_exit(&spa->spa_async_lock); 3106219089Spjd if (done) 3107219089Spjd break; 3108219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 3109219089Spjd (void) poll(NULL, 0, 100); 3110219089Spjd } 3111219089Spjd 3112219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3113219089Spjd 3114219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3115219089Spjd new_ms_count = tvd->vdev_ms_count; 3116219089Spjd new_class_space = metaslab_class_get_space(mc); 3117219089Spjd 3118219089Spjd if (tvd->vdev_mg != mg || mg->mg_class != mc) { 3119236143Smm if (ztest_opts.zo_verbose >= 5) { 3120219089Spjd (void) printf("Could not verify LUN expansion due to " 3121219089Spjd "intervening vdev offline or remove.\n"); 3122219089Spjd } 3123219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3124236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3125219089Spjd return; 3126219089Spjd } 3127219089Spjd 3128219089Spjd /* 3129219089Spjd * Make sure we were able to grow the vdev. 3130219089Spjd */ 3131219089Spjd if (new_ms_count <= old_ms_count) 3132219089Spjd fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n", 3133219089Spjd old_ms_count, new_ms_count); 3134219089Spjd 3135219089Spjd /* 3136219089Spjd * Make sure we were able to grow the pool. 3137219089Spjd */ 3138219089Spjd if (new_class_space <= old_class_space) 3139219089Spjd fatal(0, "LUN expansion failed: class_space %llu <= %llu\n", 3140219089Spjd old_class_space, new_class_space); 3141219089Spjd 3142236143Smm if (ztest_opts.zo_verbose >= 5) { 3143219089Spjd char oldnumbuf[6], newnumbuf[6]; 3144219089Spjd 3145219089Spjd nicenum(old_class_space, oldnumbuf); 3146219089Spjd nicenum(new_class_space, newnumbuf); 3147219089Spjd (void) printf("%s grew from %s to %s\n", 3148219089Spjd spa->spa_name, oldnumbuf, newnumbuf); 3149219089Spjd } 3150219089Spjd 3151219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3152236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3153168404Spjd} 3154168404Spjd 3155219089Spjd/* 3156219089Spjd * Verify that dmu_objset_{create,destroy,open,close} work as expected. 3157219089Spjd */ 3158168404Spjd/* ARGSUSED */ 3159168404Spjdstatic void 3160219089Spjdztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 3161168404Spjd{ 3162168404Spjd /* 3163219089Spjd * Create the objects common to all ztest datasets. 3164168404Spjd */ 3165219089Spjd VERIFY(zap_create_claim(os, ZTEST_DIROBJ, 3166168404Spjd DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); 3167219089Spjd} 3168168404Spjd 3169219089Spjdstatic int 3170219089Spjdztest_dataset_create(char *dsname) 3171219089Spjd{ 3172219089Spjd uint64_t zilset = ztest_random(100); 3173219089Spjd int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, 3174219089Spjd ztest_objset_create_cb, NULL); 3175219089Spjd 3176219089Spjd if (err || zilset < 80) 3177219089Spjd return (err); 3178219089Spjd 3179236143Smm if (ztest_opts.zo_verbose >= 6) 3180236143Smm (void) printf("Setting dataset %s to sync always\n", dsname); 3181219089Spjd return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC, 3182219089Spjd ZFS_SYNC_ALWAYS, B_FALSE)); 3183168404Spjd} 3184168404Spjd 3185219089Spjd/* ARGSUSED */ 3186168404Spjdstatic int 3187219089Spjdztest_objset_destroy_cb(const char *name, void *arg) 3188168404Spjd{ 3189168404Spjd objset_t *os; 3190219089Spjd dmu_object_info_t doi; 3191168404Spjd int error; 3192168404Spjd 3193168404Spjd /* 3194168404Spjd * Verify that the dataset contains a directory object. 3195168404Spjd */ 3196248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); 3197219089Spjd error = dmu_object_info(os, ZTEST_DIROBJ, &doi); 3198168404Spjd if (error != ENOENT) { 3199168404Spjd /* We could have crashed in the middle of destroying it */ 3200240415Smm ASSERT0(error); 3201219089Spjd ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); 3202219089Spjd ASSERT3S(doi.doi_physical_blocks_512, >=, 0); 3203168404Spjd } 3204248571Smm dmu_objset_disown(os, FTAG); 3205168404Spjd 3206168404Spjd /* 3207168404Spjd * Destroy the dataset. 3208168404Spjd */ 3209248571Smm if (strchr(name, '@') != NULL) { 3210248571Smm VERIFY0(dsl_destroy_snapshot(name, B_FALSE)); 3211248571Smm } else { 3212248571Smm VERIFY0(dsl_destroy_head(name)); 3213248571Smm } 3214168404Spjd return (0); 3215168404Spjd} 3216168404Spjd 3217219089Spjdstatic boolean_t 3218219089Spjdztest_snapshot_create(char *osname, uint64_t id) 3219168404Spjd{ 3220219089Spjd char snapname[MAXNAMELEN]; 3221219089Spjd int error; 3222168404Spjd 3223248571Smm (void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id); 3224168404Spjd 3225248571Smm error = dmu_objset_snapshot_one(osname, snapname); 3226219089Spjd if (error == ENOSPC) { 3227219089Spjd ztest_record_enospc(FTAG); 3228219089Spjd return (B_FALSE); 3229219089Spjd } 3230248571Smm if (error != 0 && error != EEXIST) { 3231248571Smm fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname, 3232248571Smm snapname, error); 3233248571Smm } 3234219089Spjd return (B_TRUE); 3235219089Spjd} 3236168404Spjd 3237219089Spjdstatic boolean_t 3238219089Spjdztest_snapshot_destroy(char *osname, uint64_t id) 3239219089Spjd{ 3240219089Spjd char snapname[MAXNAMELEN]; 3241219089Spjd int error; 3242219089Spjd 3243219089Spjd (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, 3244219089Spjd (u_longlong_t)id); 3245219089Spjd 3246248571Smm error = dsl_destroy_snapshot(snapname, B_FALSE); 3247219089Spjd if (error != 0 && error != ENOENT) 3248219089Spjd fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error); 3249219089Spjd return (B_TRUE); 3250168404Spjd} 3251168404Spjd 3252219089Spjd/* ARGSUSED */ 3253168404Spjdvoid 3254219089Spjdztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) 3255168404Spjd{ 3256219089Spjd ztest_ds_t zdtmp; 3257219089Spjd int iters; 3258168404Spjd int error; 3259185029Spjd objset_t *os, *os2; 3260219089Spjd char name[MAXNAMELEN]; 3261168404Spjd zilog_t *zilog; 3262168404Spjd 3263236143Smm (void) rw_rdlock(&ztest_name_lock); 3264168404Spjd 3265219089Spjd (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", 3266236143Smm ztest_opts.zo_pool, (u_longlong_t)id); 3267168404Spjd 3268168404Spjd /* 3269168404Spjd * If this dataset exists from a previous run, process its replay log 3270168404Spjd * half of the time. If we don't replay it, then dmu_objset_destroy() 3271219089Spjd * (invoked from ztest_objset_destroy_cb()) should just throw it away. 3272168404Spjd */ 3273168404Spjd if (ztest_random(2) == 0 && 3274219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { 3275236143Smm ztest_zd_init(&zdtmp, NULL, os); 3276219089Spjd zil_replay(os, &zdtmp, ztest_replay_vector); 3277219089Spjd ztest_zd_fini(&zdtmp); 3278219089Spjd dmu_objset_disown(os, FTAG); 3279168404Spjd } 3280168404Spjd 3281168404Spjd /* 3282168404Spjd * There may be an old instance of the dataset we're about to 3283168404Spjd * create lying around from a previous run. If so, destroy it 3284168404Spjd * and all of its snapshots. 3285168404Spjd */ 3286219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 3287168404Spjd DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 3288168404Spjd 3289168404Spjd /* 3290168404Spjd * Verify that the destroyed dataset is no longer in the namespace. 3291168404Spjd */ 3292248571Smm VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, 3293248571Smm FTAG, &os)); 3294168404Spjd 3295168404Spjd /* 3296168404Spjd * Verify that we can create a new dataset. 3297168404Spjd */ 3298219089Spjd error = ztest_dataset_create(name); 3299168404Spjd if (error) { 3300168404Spjd if (error == ENOSPC) { 3301219089Spjd ztest_record_enospc(FTAG); 3302236143Smm (void) rw_unlock(&ztest_name_lock); 3303168404Spjd return; 3304168404Spjd } 3305168404Spjd fatal(0, "dmu_objset_create(%s) = %d", name, error); 3306168404Spjd } 3307168404Spjd 3308248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); 3309168404Spjd 3310236143Smm ztest_zd_init(&zdtmp, NULL, os); 3311219089Spjd 3312168404Spjd /* 3313168404Spjd * Open the intent log for it. 3314168404Spjd */ 3315219089Spjd zilog = zil_open(os, ztest_get_data); 3316168404Spjd 3317168404Spjd /* 3318219089Spjd * Put some objects in there, do a little I/O to them, 3319219089Spjd * and randomly take a couple of snapshots along the way. 3320168404Spjd */ 3321219089Spjd iters = ztest_random(5); 3322219089Spjd for (int i = 0; i < iters; i++) { 3323219089Spjd ztest_dmu_object_alloc_free(&zdtmp, id); 3324219089Spjd if (ztest_random(iters) == 0) 3325219089Spjd (void) ztest_snapshot_create(name, i); 3326168404Spjd } 3327168404Spjd 3328168404Spjd /* 3329168404Spjd * Verify that we cannot create an existing dataset. 3330168404Spjd */ 3331219089Spjd VERIFY3U(EEXIST, ==, 3332219089Spjd dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); 3333168404Spjd 3334168404Spjd /* 3335219089Spjd * Verify that we can hold an objset that is also owned. 3336168404Spjd */ 3337219089Spjd VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2)); 3338219089Spjd dmu_objset_rele(os2, FTAG); 3339168404Spjd 3340219089Spjd /* 3341219089Spjd * Verify that we cannot own an objset that is already owned. 3342219089Spjd */ 3343219089Spjd VERIFY3U(EBUSY, ==, 3344219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); 3345219089Spjd 3346168404Spjd zil_close(zilog); 3347219089Spjd dmu_objset_disown(os, FTAG); 3348219089Spjd ztest_zd_fini(&zdtmp); 3349168404Spjd 3350236143Smm (void) rw_unlock(&ztest_name_lock); 3351168404Spjd} 3352168404Spjd 3353168404Spjd/* 3354168404Spjd * Verify that dmu_snapshot_{create,destroy,open,close} work as expected. 3355168404Spjd */ 3356168404Spjdvoid 3357219089Spjdztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) 3358168404Spjd{ 3359236143Smm (void) rw_rdlock(&ztest_name_lock); 3360219089Spjd (void) ztest_snapshot_destroy(zd->zd_name, id); 3361219089Spjd (void) ztest_snapshot_create(zd->zd_name, id); 3362236143Smm (void) rw_unlock(&ztest_name_lock); 3363219089Spjd} 3364219089Spjd 3365219089Spjd/* 3366219089Spjd * Cleanup non-standard snapshots and clones. 3367219089Spjd */ 3368219089Spjdvoid 3369219089Spjdztest_dsl_dataset_cleanup(char *osname, uint64_t id) 3370219089Spjd{ 3371219089Spjd char snap1name[MAXNAMELEN]; 3372219089Spjd char clone1name[MAXNAMELEN]; 3373219089Spjd char snap2name[MAXNAMELEN]; 3374219089Spjd char clone2name[MAXNAMELEN]; 3375219089Spjd char snap3name[MAXNAMELEN]; 3376168404Spjd int error; 3377168404Spjd 3378219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3379219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3380219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3381219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3382219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3383168404Spjd 3384248571Smm error = dsl_destroy_head(clone2name); 3385219089Spjd if (error && error != ENOENT) 3386248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error); 3387248571Smm error = dsl_destroy_snapshot(snap3name, B_FALSE); 3388219089Spjd if (error && error != ENOENT) 3389248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error); 3390248571Smm error = dsl_destroy_snapshot(snap2name, B_FALSE); 3391219089Spjd if (error && error != ENOENT) 3392248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error); 3393248571Smm error = dsl_destroy_head(clone1name); 3394219089Spjd if (error && error != ENOENT) 3395248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error); 3396248571Smm error = dsl_destroy_snapshot(snap1name, B_FALSE); 3397219089Spjd if (error && error != ENOENT) 3398248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error); 3399168404Spjd} 3400168404Spjd 3401168404Spjd/* 3402207910Smm * Verify dsl_dataset_promote handles EBUSY 3403207910Smm */ 3404207910Smmvoid 3405219089Spjdztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) 3406207910Smm{ 3407248571Smm objset_t *os; 3408219089Spjd char snap1name[MAXNAMELEN]; 3409219089Spjd char clone1name[MAXNAMELEN]; 3410219089Spjd char snap2name[MAXNAMELEN]; 3411219089Spjd char clone2name[MAXNAMELEN]; 3412219089Spjd char snap3name[MAXNAMELEN]; 3413219089Spjd char *osname = zd->zd_name; 3414219089Spjd int error; 3415207910Smm 3416236143Smm (void) rw_rdlock(&ztest_name_lock); 3417207910Smm 3418219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3419207910Smm 3420219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3421219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3422219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3423219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3424219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3425207910Smm 3426248571Smm error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1); 3427209962Smm if (error && error != EEXIST) { 3428209962Smm if (error == ENOSPC) { 3429209962Smm ztest_record_enospc(FTAG); 3430209962Smm goto out; 3431209962Smm } 3432209962Smm fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); 3433209962Smm } 3434207910Smm 3435248571Smm error = dmu_objset_clone(clone1name, snap1name); 3436209962Smm if (error) { 3437209962Smm if (error == ENOSPC) { 3438209962Smm ztest_record_enospc(FTAG); 3439209962Smm goto out; 3440209962Smm } 3441207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); 3442209962Smm } 3443207910Smm 3444248571Smm error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1); 3445209962Smm if (error && error != EEXIST) { 3446209962Smm if (error == ENOSPC) { 3447209962Smm ztest_record_enospc(FTAG); 3448209962Smm goto out; 3449209962Smm } 3450209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); 3451209962Smm } 3452207910Smm 3453248571Smm error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1); 3454209962Smm if (error && error != EEXIST) { 3455209962Smm if (error == ENOSPC) { 3456209962Smm ztest_record_enospc(FTAG); 3457209962Smm goto out; 3458209962Smm } 3459209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); 3460209962Smm } 3461207910Smm 3462248571Smm error = dmu_objset_clone(clone2name, snap3name); 3463209962Smm if (error) { 3464209962Smm if (error == ENOSPC) { 3465219089Spjd ztest_record_enospc(FTAG); 3466209962Smm goto out; 3467209962Smm } 3468207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); 3469209962Smm } 3470207910Smm 3471248571Smm error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); 3472207910Smm if (error) 3473248571Smm fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); 3474219089Spjd error = dsl_dataset_promote(clone2name, NULL); 3475207910Smm if (error != EBUSY) 3476207910Smm fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, 3477207910Smm error); 3478248571Smm dmu_objset_disown(os, FTAG); 3479207910Smm 3480209962Smmout: 3481219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3482207910Smm 3483236143Smm (void) rw_unlock(&ztest_name_lock); 3484207910Smm} 3485207910Smm 3486207910Smm/* 3487168404Spjd * Verify that dmu_object_{alloc,free} work as expected. 3488168404Spjd */ 3489168404Spjdvoid 3490219089Spjdztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id) 3491168404Spjd{ 3492219089Spjd ztest_od_t od[4]; 3493219089Spjd int batchsize = sizeof (od) / sizeof (od[0]); 3494168404Spjd 3495219089Spjd for (int b = 0; b < batchsize; b++) 3496219089Spjd ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0); 3497168404Spjd 3498168404Spjd /* 3499219089Spjd * Destroy the previous batch of objects, create a new batch, 3500219089Spjd * and do some I/O on the new objects. 3501168404Spjd */ 3502219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0) 3503219089Spjd return; 3504168404Spjd 3505219089Spjd while (ztest_random(4 * batchsize) != 0) 3506219089Spjd ztest_io(zd, od[ztest_random(batchsize)].od_object, 3507219089Spjd ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 3508168404Spjd} 3509168404Spjd 3510168404Spjd/* 3511168404Spjd * Verify that dmu_{read,write} work as expected. 3512168404Spjd */ 3513168404Spjdvoid 3514219089Spjdztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) 3515168404Spjd{ 3516219089Spjd objset_t *os = zd->zd_os; 3517219089Spjd ztest_od_t od[2]; 3518168404Spjd dmu_tx_t *tx; 3519168404Spjd int i, freeit, error; 3520168404Spjd uint64_t n, s, txg; 3521168404Spjd bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT; 3522219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3523219089Spjd uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t); 3524168404Spjd uint64_t regions = 997; 3525168404Spjd uint64_t stride = 123456789ULL; 3526168404Spjd uint64_t width = 40; 3527168404Spjd int free_percent = 5; 3528168404Spjd 3529168404Spjd /* 3530168404Spjd * This test uses two objects, packobj and bigobj, that are always 3531168404Spjd * updated together (i.e. in the same tx) so that their contents are 3532168404Spjd * in sync and can be compared. Their contents relate to each other 3533168404Spjd * in a simple way: packobj is a dense array of 'bufwad' structures, 3534168404Spjd * while bigobj is a sparse array of the same bufwads. Specifically, 3535168404Spjd * for any index n, there are three bufwads that should be identical: 3536168404Spjd * 3537168404Spjd * packobj, at offset n * sizeof (bufwad_t) 3538168404Spjd * bigobj, at the head of the nth chunk 3539168404Spjd * bigobj, at the tail of the nth chunk 3540168404Spjd * 3541168404Spjd * The chunk size is arbitrary. It doesn't have to be a power of two, 3542168404Spjd * and it doesn't have any relation to the object blocksize. 3543168404Spjd * The only requirement is that it can hold at least two bufwads. 3544168404Spjd * 3545168404Spjd * Normally, we write the bufwad to each of these locations. 3546168404Spjd * However, free_percent of the time we instead write zeroes to 3547168404Spjd * packobj and perform a dmu_free_range() on bigobj. By comparing 3548168404Spjd * bigobj to packobj, we can verify that the DMU is correctly 3549168404Spjd * tracking which parts of an object are allocated and free, 3550168404Spjd * and that the contents of the allocated blocks are correct. 3551168404Spjd */ 3552168404Spjd 3553168404Spjd /* 3554168404Spjd * Read the directory info. If it's the first time, set things up. 3555168404Spjd */ 3556219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize); 3557219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3558168404Spjd 3559219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3560219089Spjd return; 3561168404Spjd 3562219089Spjd bigobj = od[0].od_object; 3563219089Spjd packobj = od[1].od_object; 3564219089Spjd chunksize = od[0].od_gen; 3565219089Spjd ASSERT(chunksize == od[1].od_gen); 3566168404Spjd 3567168404Spjd /* 3568168404Spjd * Prefetch a random chunk of the big object. 3569168404Spjd * Our aim here is to get some async reads in flight 3570168404Spjd * for blocks that we may free below; the DMU should 3571168404Spjd * handle this race correctly. 3572168404Spjd */ 3573168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3574168404Spjd s = 1 + ztest_random(2 * width - 1); 3575219089Spjd dmu_prefetch(os, bigobj, n * chunksize, s * chunksize); 3576168404Spjd 3577168404Spjd /* 3578168404Spjd * Pick a random index and compute the offsets into packobj and bigobj. 3579168404Spjd */ 3580168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3581168404Spjd s = 1 + ztest_random(width - 1); 3582168404Spjd 3583168404Spjd packoff = n * sizeof (bufwad_t); 3584168404Spjd packsize = s * sizeof (bufwad_t); 3585168404Spjd 3586219089Spjd bigoff = n * chunksize; 3587219089Spjd bigsize = s * chunksize; 3588168404Spjd 3589168404Spjd packbuf = umem_alloc(packsize, UMEM_NOFAIL); 3590168404Spjd bigbuf = umem_alloc(bigsize, UMEM_NOFAIL); 3591168404Spjd 3592168404Spjd /* 3593168404Spjd * free_percent of the time, free a range of bigobj rather than 3594168404Spjd * overwriting it. 3595168404Spjd */ 3596168404Spjd freeit = (ztest_random(100) < free_percent); 3597168404Spjd 3598168404Spjd /* 3599168404Spjd * Read the current contents of our objects. 3600168404Spjd */ 3601219089Spjd error = dmu_read(os, packobj, packoff, packsize, packbuf, 3602209962Smm DMU_READ_PREFETCH); 3603240415Smm ASSERT0(error); 3604219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf, 3605209962Smm DMU_READ_PREFETCH); 3606240415Smm ASSERT0(error); 3607168404Spjd 3608168404Spjd /* 3609168404Spjd * Get a tx for the mods to both packobj and bigobj. 3610168404Spjd */ 3611168404Spjd tx = dmu_tx_create(os); 3612168404Spjd 3613219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3614168404Spjd 3615168404Spjd if (freeit) 3616219089Spjd dmu_tx_hold_free(tx, bigobj, bigoff, bigsize); 3617168404Spjd else 3618219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3619168404Spjd 3620254077Sdelphij /* This accounts for setting the checksum/compression. */ 3621254077Sdelphij dmu_tx_hold_bonus(tx, bigobj); 3622254077Sdelphij 3623219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3624219089Spjd if (txg == 0) { 3625168404Spjd umem_free(packbuf, packsize); 3626168404Spjd umem_free(bigbuf, bigsize); 3627168404Spjd return; 3628168404Spjd } 3629168404Spjd 3630219089Spjd dmu_object_set_checksum(os, bigobj, 3631219089Spjd (enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx); 3632168404Spjd 3633219089Spjd dmu_object_set_compress(os, bigobj, 3634219089Spjd (enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx); 3635219089Spjd 3636168404Spjd /* 3637168404Spjd * For each index from n to n + s, verify that the existing bufwad 3638168404Spjd * in packobj matches the bufwads at the head and tail of the 3639168404Spjd * corresponding chunk in bigobj. Then update all three bufwads 3640168404Spjd * with the new values we want to write out. 3641168404Spjd */ 3642168404Spjd for (i = 0; i < s; i++) { 3643168404Spjd /* LINTED */ 3644168404Spjd pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3645168404Spjd /* LINTED */ 3646219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3647168404Spjd /* LINTED */ 3648219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3649168404Spjd 3650168404Spjd ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3651168404Spjd ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3652168404Spjd 3653168404Spjd if (pack->bw_txg > txg) 3654168404Spjd fatal(0, "future leak: got %llx, open txg is %llx", 3655168404Spjd pack->bw_txg, txg); 3656168404Spjd 3657168404Spjd if (pack->bw_data != 0 && pack->bw_index != n + i) 3658168404Spjd fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3659168404Spjd pack->bw_index, n, i); 3660168404Spjd 3661168404Spjd if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3662168404Spjd fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3663168404Spjd 3664168404Spjd if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3665168404Spjd fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3666168404Spjd 3667168404Spjd if (freeit) { 3668168404Spjd bzero(pack, sizeof (bufwad_t)); 3669168404Spjd } else { 3670168404Spjd pack->bw_index = n + i; 3671168404Spjd pack->bw_txg = txg; 3672168404Spjd pack->bw_data = 1 + ztest_random(-2ULL); 3673168404Spjd } 3674168404Spjd *bigH = *pack; 3675168404Spjd *bigT = *pack; 3676168404Spjd } 3677168404Spjd 3678168404Spjd /* 3679168404Spjd * We've verified all the old bufwads, and made new ones. 3680168404Spjd * Now write them out. 3681168404Spjd */ 3682219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3683168404Spjd 3684168404Spjd if (freeit) { 3685236143Smm if (ztest_opts.zo_verbose >= 7) { 3686168404Spjd (void) printf("freeing offset %llx size %llx" 3687168404Spjd " txg %llx\n", 3688168404Spjd (u_longlong_t)bigoff, 3689168404Spjd (u_longlong_t)bigsize, 3690168404Spjd (u_longlong_t)txg); 3691168404Spjd } 3692219089Spjd VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx)); 3693168404Spjd } else { 3694236143Smm if (ztest_opts.zo_verbose >= 7) { 3695168404Spjd (void) printf("writing offset %llx size %llx" 3696168404Spjd " txg %llx\n", 3697168404Spjd (u_longlong_t)bigoff, 3698168404Spjd (u_longlong_t)bigsize, 3699168404Spjd (u_longlong_t)txg); 3700168404Spjd } 3701219089Spjd dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx); 3702168404Spjd } 3703168404Spjd 3704168404Spjd dmu_tx_commit(tx); 3705168404Spjd 3706168404Spjd /* 3707168404Spjd * Sanity check the stuff we just wrote. 3708168404Spjd */ 3709168404Spjd { 3710168404Spjd void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3711168404Spjd void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3712168404Spjd 3713219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3714209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3715219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3716209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3717168404Spjd 3718168404Spjd ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3719168404Spjd ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3720168404Spjd 3721168404Spjd umem_free(packcheck, packsize); 3722168404Spjd umem_free(bigcheck, bigsize); 3723168404Spjd } 3724168404Spjd 3725168404Spjd umem_free(packbuf, packsize); 3726168404Spjd umem_free(bigbuf, bigsize); 3727168404Spjd} 3728168404Spjd 3729168404Spjdvoid 3730209962Smmcompare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf, 3731219089Spjd uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg) 3732209962Smm{ 3733209962Smm uint64_t i; 3734209962Smm bufwad_t *pack; 3735209962Smm bufwad_t *bigH; 3736209962Smm bufwad_t *bigT; 3737209962Smm 3738209962Smm /* 3739209962Smm * For each index from n to n + s, verify that the existing bufwad 3740209962Smm * in packobj matches the bufwads at the head and tail of the 3741209962Smm * corresponding chunk in bigobj. Then update all three bufwads 3742209962Smm * with the new values we want to write out. 3743209962Smm */ 3744209962Smm for (i = 0; i < s; i++) { 3745209962Smm /* LINTED */ 3746209962Smm pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3747209962Smm /* LINTED */ 3748219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3749209962Smm /* LINTED */ 3750219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3751209962Smm 3752209962Smm ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3753209962Smm ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3754209962Smm 3755209962Smm if (pack->bw_txg > txg) 3756209962Smm fatal(0, "future leak: got %llx, open txg is %llx", 3757209962Smm pack->bw_txg, txg); 3758209962Smm 3759209962Smm if (pack->bw_data != 0 && pack->bw_index != n + i) 3760209962Smm fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3761209962Smm pack->bw_index, n, i); 3762209962Smm 3763209962Smm if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3764209962Smm fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3765209962Smm 3766209962Smm if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3767209962Smm fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3768209962Smm 3769209962Smm pack->bw_index = n + i; 3770209962Smm pack->bw_txg = txg; 3771209962Smm pack->bw_data = 1 + ztest_random(-2ULL); 3772209962Smm 3773209962Smm *bigH = *pack; 3774209962Smm *bigT = *pack; 3775209962Smm } 3776209962Smm} 3777209962Smm 3778209962Smmvoid 3779219089Spjdztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) 3780209962Smm{ 3781219089Spjd objset_t *os = zd->zd_os; 3782219089Spjd ztest_od_t od[2]; 3783209962Smm dmu_tx_t *tx; 3784209962Smm uint64_t i; 3785209962Smm int error; 3786209962Smm uint64_t n, s, txg; 3787209962Smm bufwad_t *packbuf, *bigbuf; 3788219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3789219089Spjd uint64_t blocksize = ztest_random_blocksize(); 3790219089Spjd uint64_t chunksize = blocksize; 3791209962Smm uint64_t regions = 997; 3792209962Smm uint64_t stride = 123456789ULL; 3793209962Smm uint64_t width = 9; 3794209962Smm dmu_buf_t *bonus_db; 3795209962Smm arc_buf_t **bigbuf_arcbufs; 3796219089Spjd dmu_object_info_t doi; 3797209962Smm 3798209962Smm /* 3799209962Smm * This test uses two objects, packobj and bigobj, that are always 3800209962Smm * updated together (i.e. in the same tx) so that their contents are 3801209962Smm * in sync and can be compared. Their contents relate to each other 3802209962Smm * in a simple way: packobj is a dense array of 'bufwad' structures, 3803209962Smm * while bigobj is a sparse array of the same bufwads. Specifically, 3804209962Smm * for any index n, there are three bufwads that should be identical: 3805209962Smm * 3806209962Smm * packobj, at offset n * sizeof (bufwad_t) 3807209962Smm * bigobj, at the head of the nth chunk 3808209962Smm * bigobj, at the tail of the nth chunk 3809209962Smm * 3810209962Smm * The chunk size is set equal to bigobj block size so that 3811209962Smm * dmu_assign_arcbuf() can be tested for object updates. 3812209962Smm */ 3813209962Smm 3814209962Smm /* 3815209962Smm * Read the directory info. If it's the first time, set things up. 3816209962Smm */ 3817219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 3818219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3819209962Smm 3820219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3821219089Spjd return; 3822209962Smm 3823219089Spjd bigobj = od[0].od_object; 3824219089Spjd packobj = od[1].od_object; 3825219089Spjd blocksize = od[0].od_blocksize; 3826219089Spjd chunksize = blocksize; 3827219089Spjd ASSERT(chunksize == od[1].od_gen); 3828209962Smm 3829219089Spjd VERIFY(dmu_object_info(os, bigobj, &doi) == 0); 3830219089Spjd VERIFY(ISP2(doi.doi_data_block_size)); 3831219089Spjd VERIFY(chunksize == doi.doi_data_block_size); 3832219089Spjd VERIFY(chunksize >= 2 * sizeof (bufwad_t)); 3833209962Smm 3834209962Smm /* 3835209962Smm * Pick a random index and compute the offsets into packobj and bigobj. 3836209962Smm */ 3837209962Smm n = ztest_random(regions) * stride + ztest_random(width); 3838209962Smm s = 1 + ztest_random(width - 1); 3839209962Smm 3840209962Smm packoff = n * sizeof (bufwad_t); 3841209962Smm packsize = s * sizeof (bufwad_t); 3842209962Smm 3843219089Spjd bigoff = n * chunksize; 3844219089Spjd bigsize = s * chunksize; 3845209962Smm 3846209962Smm packbuf = umem_zalloc(packsize, UMEM_NOFAIL); 3847209962Smm bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL); 3848209962Smm 3849219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db)); 3850209962Smm 3851209962Smm bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL); 3852209962Smm 3853209962Smm /* 3854209962Smm * Iteration 0 test zcopy for DB_UNCACHED dbufs. 3855209962Smm * Iteration 1 test zcopy to already referenced dbufs. 3856209962Smm * Iteration 2 test zcopy to dirty dbuf in the same txg. 3857209962Smm * Iteration 3 test zcopy to dbuf dirty in previous txg. 3858209962Smm * Iteration 4 test zcopy when dbuf is no longer dirty. 3859209962Smm * Iteration 5 test zcopy when it can't be done. 3860209962Smm * Iteration 6 one more zcopy write. 3861209962Smm */ 3862209962Smm for (i = 0; i < 7; i++) { 3863209962Smm uint64_t j; 3864209962Smm uint64_t off; 3865209962Smm 3866209962Smm /* 3867209962Smm * In iteration 5 (i == 5) use arcbufs 3868209962Smm * that don't match bigobj blksz to test 3869209962Smm * dmu_assign_arcbuf() when it can't directly 3870209962Smm * assign an arcbuf to a dbuf. 3871209962Smm */ 3872209962Smm for (j = 0; j < s; j++) { 3873209962Smm if (i != 5) { 3874209962Smm bigbuf_arcbufs[j] = 3875219089Spjd dmu_request_arcbuf(bonus_db, chunksize); 3876209962Smm } else { 3877209962Smm bigbuf_arcbufs[2 * j] = 3878219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3879209962Smm bigbuf_arcbufs[2 * j + 1] = 3880219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3881209962Smm } 3882209962Smm } 3883209962Smm 3884209962Smm /* 3885209962Smm * Get a tx for the mods to both packobj and bigobj. 3886209962Smm */ 3887209962Smm tx = dmu_tx_create(os); 3888209962Smm 3889219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3890219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3891209962Smm 3892219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3893219089Spjd if (txg == 0) { 3894209962Smm umem_free(packbuf, packsize); 3895209962Smm umem_free(bigbuf, bigsize); 3896209962Smm for (j = 0; j < s; j++) { 3897209962Smm if (i != 5) { 3898209962Smm dmu_return_arcbuf(bigbuf_arcbufs[j]); 3899209962Smm } else { 3900209962Smm dmu_return_arcbuf( 3901209962Smm bigbuf_arcbufs[2 * j]); 3902209962Smm dmu_return_arcbuf( 3903209962Smm bigbuf_arcbufs[2 * j + 1]); 3904209962Smm } 3905209962Smm } 3906209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 3907209962Smm dmu_buf_rele(bonus_db, FTAG); 3908209962Smm return; 3909209962Smm } 3910209962Smm 3911209962Smm /* 3912209962Smm * 50% of the time don't read objects in the 1st iteration to 3913209962Smm * test dmu_assign_arcbuf() for the case when there're no 3914209962Smm * existing dbufs for the specified offsets. 3915209962Smm */ 3916209962Smm if (i != 0 || ztest_random(2) != 0) { 3917219089Spjd error = dmu_read(os, packobj, packoff, 3918209962Smm packsize, packbuf, DMU_READ_PREFETCH); 3919240415Smm ASSERT0(error); 3920219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, 3921209962Smm bigbuf, DMU_READ_PREFETCH); 3922240415Smm ASSERT0(error); 3923209962Smm } 3924209962Smm compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, 3925219089Spjd n, chunksize, txg); 3926209962Smm 3927209962Smm /* 3928209962Smm * We've verified all the old bufwads, and made new ones. 3929209962Smm * Now write them out. 3930209962Smm */ 3931219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3932236143Smm if (ztest_opts.zo_verbose >= 7) { 3933209962Smm (void) printf("writing offset %llx size %llx" 3934209962Smm " txg %llx\n", 3935209962Smm (u_longlong_t)bigoff, 3936209962Smm (u_longlong_t)bigsize, 3937209962Smm (u_longlong_t)txg); 3938209962Smm } 3939219089Spjd for (off = bigoff, j = 0; j < s; j++, off += chunksize) { 3940209962Smm dmu_buf_t *dbt; 3941209962Smm if (i != 5) { 3942209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3943219089Spjd bigbuf_arcbufs[j]->b_data, chunksize); 3944209962Smm } else { 3945209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3946209962Smm bigbuf_arcbufs[2 * j]->b_data, 3947219089Spjd chunksize / 2); 3948209962Smm bcopy((caddr_t)bigbuf + (off - bigoff) + 3949219089Spjd chunksize / 2, 3950209962Smm bigbuf_arcbufs[2 * j + 1]->b_data, 3951219089Spjd chunksize / 2); 3952209962Smm } 3953209962Smm 3954209962Smm if (i == 1) { 3955219089Spjd VERIFY(dmu_buf_hold(os, bigobj, off, 3956219089Spjd FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); 3957209962Smm } 3958209962Smm if (i != 5) { 3959209962Smm dmu_assign_arcbuf(bonus_db, off, 3960209962Smm bigbuf_arcbufs[j], tx); 3961209962Smm } else { 3962209962Smm dmu_assign_arcbuf(bonus_db, off, 3963209962Smm bigbuf_arcbufs[2 * j], tx); 3964209962Smm dmu_assign_arcbuf(bonus_db, 3965219089Spjd off + chunksize / 2, 3966209962Smm bigbuf_arcbufs[2 * j + 1], tx); 3967209962Smm } 3968209962Smm if (i == 1) { 3969209962Smm dmu_buf_rele(dbt, FTAG); 3970209962Smm } 3971209962Smm } 3972209962Smm dmu_tx_commit(tx); 3973209962Smm 3974209962Smm /* 3975209962Smm * Sanity check the stuff we just wrote. 3976209962Smm */ 3977209962Smm { 3978209962Smm void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3979209962Smm void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3980209962Smm 3981219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3982209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3983219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3984209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3985209962Smm 3986209962Smm ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3987209962Smm ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3988209962Smm 3989209962Smm umem_free(packcheck, packsize); 3990209962Smm umem_free(bigcheck, bigsize); 3991209962Smm } 3992209962Smm if (i == 2) { 3993209962Smm txg_wait_open(dmu_objset_pool(os), 0); 3994209962Smm } else if (i == 3) { 3995209962Smm txg_wait_synced(dmu_objset_pool(os), 0); 3996209962Smm } 3997209962Smm } 3998209962Smm 3999209962Smm dmu_buf_rele(bonus_db, FTAG); 4000209962Smm umem_free(packbuf, packsize); 4001209962Smm umem_free(bigbuf, bigsize); 4002209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 4003209962Smm} 4004209962Smm 4005219089Spjd/* ARGSUSED */ 4006209962Smmvoid 4007219089Spjdztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id) 4008168404Spjd{ 4009219089Spjd ztest_od_t od[1]; 4010219089Spjd uint64_t offset = (1ULL << (ztest_random(20) + 43)) + 4011219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4012168404Spjd 4013168404Spjd /* 4014219089Spjd * Have multiple threads write to large offsets in an object 4015219089Spjd * to verify that parallel writes to an object -- even to the 4016219089Spjd * same blocks within the object -- doesn't cause any trouble. 4017168404Spjd */ 4018219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4019219089Spjd 4020219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4021219089Spjd return; 4022219089Spjd 4023219089Spjd while (ztest_random(10) != 0) 4024219089Spjd ztest_io(zd, od[0].od_object, offset); 4025168404Spjd} 4026168404Spjd 4027168404Spjdvoid 4028219089Spjdztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id) 4029168404Spjd{ 4030219089Spjd ztest_od_t od[1]; 4031219089Spjd uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) + 4032219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4033219089Spjd uint64_t count = ztest_random(20) + 1; 4034219089Spjd uint64_t blocksize = ztest_random_blocksize(); 4035219089Spjd void *data; 4036168404Spjd 4037219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4038168404Spjd 4039219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4040185029Spjd return; 4041168404Spjd 4042219089Spjd if (ztest_truncate(zd, od[0].od_object, offset, count * blocksize) != 0) 4043185029Spjd return; 4044168404Spjd 4045219089Spjd ztest_prealloc(zd, od[0].od_object, offset, count * blocksize); 4046185029Spjd 4047219089Spjd data = umem_zalloc(blocksize, UMEM_NOFAIL); 4048185029Spjd 4049219089Spjd while (ztest_random(count) != 0) { 4050219089Spjd uint64_t randoff = offset + (ztest_random(count) * blocksize); 4051219089Spjd if (ztest_write(zd, od[0].od_object, randoff, blocksize, 4052219089Spjd data) != 0) 4053219089Spjd break; 4054219089Spjd while (ztest_random(4) != 0) 4055219089Spjd ztest_io(zd, od[0].od_object, randoff); 4056185029Spjd } 4057168404Spjd 4058219089Spjd umem_free(data, blocksize); 4059168404Spjd} 4060168404Spjd 4061168404Spjd/* 4062168404Spjd * Verify that zap_{create,destroy,add,remove,update} work as expected. 4063168404Spjd */ 4064168404Spjd#define ZTEST_ZAP_MIN_INTS 1 4065168404Spjd#define ZTEST_ZAP_MAX_INTS 4 4066168404Spjd#define ZTEST_ZAP_MAX_PROPS 1000 4067168404Spjd 4068168404Spjdvoid 4069219089Spjdztest_zap(ztest_ds_t *zd, uint64_t id) 4070168404Spjd{ 4071219089Spjd objset_t *os = zd->zd_os; 4072219089Spjd ztest_od_t od[1]; 4073168404Spjd uint64_t object; 4074168404Spjd uint64_t txg, last_txg; 4075168404Spjd uint64_t value[ZTEST_ZAP_MAX_INTS]; 4076168404Spjd uint64_t zl_ints, zl_intsize, prop; 4077168404Spjd int i, ints; 4078168404Spjd dmu_tx_t *tx; 4079168404Spjd char propname[100], txgname[100]; 4080168404Spjd int error; 4081168404Spjd char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" }; 4082168404Spjd 4083219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4084168404Spjd 4085219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4086219089Spjd return; 4087219089Spjd 4088219089Spjd object = od[0].od_object; 4089219089Spjd 4090168404Spjd /* 4091219089Spjd * Generate a known hash collision, and verify that 4092219089Spjd * we can lookup and remove both entries. 4093168404Spjd */ 4094219089Spjd tx = dmu_tx_create(os); 4095219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4096219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4097219089Spjd if (txg == 0) 4098219089Spjd return; 4099219089Spjd for (i = 0; i < 2; i++) { 4100219089Spjd value[i] = i; 4101219089Spjd VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t), 4102219089Spjd 1, &value[i], tx)); 4103168404Spjd } 4104219089Spjd for (i = 0; i < 2; i++) { 4105219089Spjd VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i], 4106219089Spjd sizeof (uint64_t), 1, &value[i], tx)); 4107219089Spjd VERIFY3U(0, ==, 4108219089Spjd zap_length(os, object, hc[i], &zl_intsize, &zl_ints)); 4109219089Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4110219089Spjd ASSERT3U(zl_ints, ==, 1); 4111219089Spjd } 4112219089Spjd for (i = 0; i < 2; i++) { 4113219089Spjd VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx)); 4114219089Spjd } 4115219089Spjd dmu_tx_commit(tx); 4116168404Spjd 4117219089Spjd /* 4118219089Spjd * Generate a buch of random entries. 4119219089Spjd */ 4120168404Spjd ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS); 4121168404Spjd 4122185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4123185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4124185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4125185029Spjd bzero(value, sizeof (value)); 4126185029Spjd last_txg = 0; 4127168404Spjd 4128185029Spjd /* 4129185029Spjd * If these zap entries already exist, validate their contents. 4130185029Spjd */ 4131185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4132185029Spjd if (error == 0) { 4133185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4134185029Spjd ASSERT3U(zl_ints, ==, 1); 4135168404Spjd 4136185029Spjd VERIFY(zap_lookup(os, object, txgname, zl_intsize, 4137185029Spjd zl_ints, &last_txg) == 0); 4138168404Spjd 4139185029Spjd VERIFY(zap_length(os, object, propname, &zl_intsize, 4140185029Spjd &zl_ints) == 0); 4141168404Spjd 4142185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4143185029Spjd ASSERT3U(zl_ints, ==, ints); 4144168404Spjd 4145185029Spjd VERIFY(zap_lookup(os, object, propname, zl_intsize, 4146185029Spjd zl_ints, value) == 0); 4147168404Spjd 4148185029Spjd for (i = 0; i < ints; i++) { 4149185029Spjd ASSERT3U(value[i], ==, last_txg + object + i); 4150168404Spjd } 4151185029Spjd } else { 4152185029Spjd ASSERT3U(error, ==, ENOENT); 4153185029Spjd } 4154168404Spjd 4155185029Spjd /* 4156185029Spjd * Atomically update two entries in our zap object. 4157185029Spjd * The first is named txg_%llu, and contains the txg 4158185029Spjd * in which the property was last updated. The second 4159185029Spjd * is named prop_%llu, and the nth element of its value 4160185029Spjd * should be txg + object + n. 4161185029Spjd */ 4162185029Spjd tx = dmu_tx_create(os); 4163219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4164219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4165219089Spjd if (txg == 0) 4166185029Spjd return; 4167168404Spjd 4168185029Spjd if (last_txg > txg) 4169185029Spjd fatal(0, "zap future leak: old %llu new %llu", last_txg, txg); 4170168404Spjd 4171185029Spjd for (i = 0; i < ints; i++) 4172185029Spjd value[i] = txg + object + i; 4173168404Spjd 4174219089Spjd VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t), 4175219089Spjd 1, &txg, tx)); 4176219089Spjd VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t), 4177219089Spjd ints, value, tx)); 4178168404Spjd 4179185029Spjd dmu_tx_commit(tx); 4180168404Spjd 4181185029Spjd /* 4182185029Spjd * Remove a random pair of entries. 4183185029Spjd */ 4184185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4185185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4186185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4187168404Spjd 4188185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4189168404Spjd 4190185029Spjd if (error == ENOENT) 4191185029Spjd return; 4192168404Spjd 4193240415Smm ASSERT0(error); 4194168404Spjd 4195185029Spjd tx = dmu_tx_create(os); 4196219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4197219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4198219089Spjd if (txg == 0) 4199185029Spjd return; 4200219089Spjd VERIFY3U(0, ==, zap_remove(os, object, txgname, tx)); 4201219089Spjd VERIFY3U(0, ==, zap_remove(os, object, propname, tx)); 4202185029Spjd dmu_tx_commit(tx); 4203168404Spjd} 4204168404Spjd 4205209962Smm/* 4206209962Smm * Testcase to test the upgrading of a microzap to fatzap. 4207209962Smm */ 4208168404Spjdvoid 4209219089Spjdztest_fzap(ztest_ds_t *zd, uint64_t id) 4210209962Smm{ 4211219089Spjd objset_t *os = zd->zd_os; 4212219089Spjd ztest_od_t od[1]; 4213219089Spjd uint64_t object, txg; 4214209962Smm 4215219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4216209962Smm 4217219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4218219089Spjd return; 4219209962Smm 4220219089Spjd object = od[0].od_object; 4221209962Smm 4222209962Smm /* 4223219089Spjd * Add entries to this ZAP and make sure it spills over 4224209962Smm * and gets upgraded to a fatzap. Also, since we are adding 4225219089Spjd * 2050 entries we should see ptrtbl growth and leaf-block split. 4226209962Smm */ 4227219089Spjd for (int i = 0; i < 2050; i++) { 4228219089Spjd char name[MAXNAMELEN]; 4229219089Spjd uint64_t value = i; 4230219089Spjd dmu_tx_t *tx; 4231219089Spjd int error; 4232209962Smm 4233219089Spjd (void) snprintf(name, sizeof (name), "fzap-%llu-%llu", 4234219089Spjd id, value); 4235219089Spjd 4236209962Smm tx = dmu_tx_create(os); 4237219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, name); 4238219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4239219089Spjd if (txg == 0) 4240209962Smm return; 4241219089Spjd error = zap_add(os, object, name, sizeof (uint64_t), 1, 4242219089Spjd &value, tx); 4243209962Smm ASSERT(error == 0 || error == EEXIST); 4244209962Smm dmu_tx_commit(tx); 4245209962Smm } 4246209962Smm} 4247209962Smm 4248219089Spjd/* ARGSUSED */ 4249209962Smmvoid 4250219089Spjdztest_zap_parallel(ztest_ds_t *zd, uint64_t id) 4251168404Spjd{ 4252219089Spjd objset_t *os = zd->zd_os; 4253219089Spjd ztest_od_t od[1]; 4254168404Spjd uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc; 4255168404Spjd dmu_tx_t *tx; 4256168404Spjd int i, namelen, error; 4257219089Spjd int micro = ztest_random(2); 4258168404Spjd char name[20], string_value[20]; 4259168404Spjd void *data; 4260168404Spjd 4261219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0); 4262219089Spjd 4263219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4264219089Spjd return; 4265219089Spjd 4266219089Spjd object = od[0].od_object; 4267219089Spjd 4268185029Spjd /* 4269185029Spjd * Generate a random name of the form 'xxx.....' where each 4270185029Spjd * x is a random printable character and the dots are dots. 4271185029Spjd * There are 94 such characters, and the name length goes from 4272185029Spjd * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names. 4273185029Spjd */ 4274185029Spjd namelen = ztest_random(sizeof (name) - 5) + 5 + 1; 4275168404Spjd 4276185029Spjd for (i = 0; i < 3; i++) 4277185029Spjd name[i] = '!' + ztest_random('~' - '!' + 1); 4278185029Spjd for (; i < namelen - 1; i++) 4279185029Spjd name[i] = '.'; 4280185029Spjd name[i] = '\0'; 4281168404Spjd 4282219089Spjd if ((namelen & 1) || micro) { 4283185029Spjd wsize = sizeof (txg); 4284185029Spjd wc = 1; 4285185029Spjd data = &txg; 4286185029Spjd } else { 4287185029Spjd wsize = 1; 4288185029Spjd wc = namelen; 4289185029Spjd data = string_value; 4290185029Spjd } 4291168404Spjd 4292185029Spjd count = -1ULL; 4293248571Smm VERIFY0(zap_count(os, object, &count)); 4294185029Spjd ASSERT(count != -1ULL); 4295168404Spjd 4296185029Spjd /* 4297185029Spjd * Select an operation: length, lookup, add, update, remove. 4298185029Spjd */ 4299185029Spjd i = ztest_random(5); 4300168404Spjd 4301185029Spjd if (i >= 2) { 4302185029Spjd tx = dmu_tx_create(os); 4303219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4304219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4305219089Spjd if (txg == 0) 4306185029Spjd return; 4307185029Spjd bcopy(name, string_value, namelen); 4308185029Spjd } else { 4309185029Spjd tx = NULL; 4310185029Spjd txg = 0; 4311185029Spjd bzero(string_value, namelen); 4312185029Spjd } 4313168404Spjd 4314185029Spjd switch (i) { 4315168404Spjd 4316185029Spjd case 0: 4317185029Spjd error = zap_length(os, object, name, &zl_wsize, &zl_wc); 4318185029Spjd if (error == 0) { 4319185029Spjd ASSERT3U(wsize, ==, zl_wsize); 4320185029Spjd ASSERT3U(wc, ==, zl_wc); 4321185029Spjd } else { 4322185029Spjd ASSERT3U(error, ==, ENOENT); 4323185029Spjd } 4324185029Spjd break; 4325168404Spjd 4326185029Spjd case 1: 4327185029Spjd error = zap_lookup(os, object, name, wsize, wc, data); 4328185029Spjd if (error == 0) { 4329185029Spjd if (data == string_value && 4330185029Spjd bcmp(name, data, namelen) != 0) 4331185029Spjd fatal(0, "name '%s' != val '%s' len %d", 4332185029Spjd name, data, namelen); 4333185029Spjd } else { 4334185029Spjd ASSERT3U(error, ==, ENOENT); 4335185029Spjd } 4336185029Spjd break; 4337168404Spjd 4338185029Spjd case 2: 4339185029Spjd error = zap_add(os, object, name, wsize, wc, data, tx); 4340185029Spjd ASSERT(error == 0 || error == EEXIST); 4341185029Spjd break; 4342168404Spjd 4343185029Spjd case 3: 4344185029Spjd VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0); 4345185029Spjd break; 4346168404Spjd 4347185029Spjd case 4: 4348185029Spjd error = zap_remove(os, object, name, tx); 4349185029Spjd ASSERT(error == 0 || error == ENOENT); 4350185029Spjd break; 4351185029Spjd } 4352168404Spjd 4353185029Spjd if (tx != NULL) 4354185029Spjd dmu_tx_commit(tx); 4355168404Spjd} 4356168404Spjd 4357219089Spjd/* 4358219089Spjd * Commit callback data. 4359219089Spjd */ 4360219089Spjdtypedef struct ztest_cb_data { 4361219089Spjd list_node_t zcd_node; 4362219089Spjd uint64_t zcd_txg; 4363219089Spjd int zcd_expected_err; 4364219089Spjd boolean_t zcd_added; 4365219089Spjd boolean_t zcd_called; 4366219089Spjd spa_t *zcd_spa; 4367219089Spjd} ztest_cb_data_t; 4368219089Spjd 4369219089Spjd/* This is the actual commit callback function */ 4370219089Spjdstatic void 4371219089Spjdztest_commit_callback(void *arg, int error) 4372219089Spjd{ 4373219089Spjd ztest_cb_data_t *data = arg; 4374219089Spjd uint64_t synced_txg; 4375219089Spjd 4376219089Spjd VERIFY(data != NULL); 4377219089Spjd VERIFY3S(data->zcd_expected_err, ==, error); 4378219089Spjd VERIFY(!data->zcd_called); 4379219089Spjd 4380219089Spjd synced_txg = spa_last_synced_txg(data->zcd_spa); 4381219089Spjd if (data->zcd_txg > synced_txg) 4382219089Spjd fatal(0, "commit callback of txg %" PRIu64 " called prematurely" 4383219089Spjd ", last synced txg = %" PRIu64 "\n", data->zcd_txg, 4384219089Spjd synced_txg); 4385219089Spjd 4386219089Spjd data->zcd_called = B_TRUE; 4387219089Spjd 4388219089Spjd if (error == ECANCELED) { 4389240415Smm ASSERT0(data->zcd_txg); 4390219089Spjd ASSERT(!data->zcd_added); 4391219089Spjd 4392219089Spjd /* 4393219089Spjd * The private callback data should be destroyed here, but 4394219089Spjd * since we are going to check the zcd_called field after 4395219089Spjd * dmu_tx_abort(), we will destroy it there. 4396219089Spjd */ 4397219089Spjd return; 4398219089Spjd } 4399219089Spjd 4400219089Spjd /* Was this callback added to the global callback list? */ 4401219089Spjd if (!data->zcd_added) 4402219089Spjd goto out; 4403219089Spjd 4404219089Spjd ASSERT3U(data->zcd_txg, !=, 0); 4405219089Spjd 4406219089Spjd /* Remove our callback from the list */ 4407219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4408219089Spjd list_remove(&zcl.zcl_callbacks, data); 4409219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4410219089Spjd 4411219089Spjdout: 4412219089Spjd umem_free(data, sizeof (ztest_cb_data_t)); 4413219089Spjd} 4414219089Spjd 4415219089Spjd/* Allocate and initialize callback data structure */ 4416219089Spjdstatic ztest_cb_data_t * 4417219089Spjdztest_create_cb_data(objset_t *os, uint64_t txg) 4418219089Spjd{ 4419219089Spjd ztest_cb_data_t *cb_data; 4420219089Spjd 4421219089Spjd cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL); 4422219089Spjd 4423219089Spjd cb_data->zcd_txg = txg; 4424219089Spjd cb_data->zcd_spa = dmu_objset_spa(os); 4425219089Spjd 4426219089Spjd return (cb_data); 4427219089Spjd} 4428219089Spjd 4429219089Spjd/* 4430219089Spjd * If a number of txgs equal to this threshold have been created after a commit 4431219089Spjd * callback has been registered but not called, then we assume there is an 4432219089Spjd * implementation bug. 4433219089Spjd */ 4434219089Spjd#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2) 4435219089Spjd 4436219089Spjd/* 4437219089Spjd * Commit callback test. 4438219089Spjd */ 4439168404Spjdvoid 4440219089Spjdztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) 4441168404Spjd{ 4442219089Spjd objset_t *os = zd->zd_os; 4443219089Spjd ztest_od_t od[1]; 4444219089Spjd dmu_tx_t *tx; 4445219089Spjd ztest_cb_data_t *cb_data[3], *tmp_cb; 4446219089Spjd uint64_t old_txg, txg; 4447219089Spjd int i, error; 4448219089Spjd 4449219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4450219089Spjd 4451219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4452219089Spjd return; 4453219089Spjd 4454219089Spjd tx = dmu_tx_create(os); 4455219089Spjd 4456219089Spjd cb_data[0] = ztest_create_cb_data(os, 0); 4457219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]); 4458219089Spjd 4459219089Spjd dmu_tx_hold_write(tx, od[0].od_object, 0, sizeof (uint64_t)); 4460219089Spjd 4461219089Spjd /* Every once in a while, abort the transaction on purpose */ 4462219089Spjd if (ztest_random(100) == 0) 4463219089Spjd error = -1; 4464219089Spjd 4465219089Spjd if (!error) 4466219089Spjd error = dmu_tx_assign(tx, TXG_NOWAIT); 4467219089Spjd 4468219089Spjd txg = error ? 0 : dmu_tx_get_txg(tx); 4469219089Spjd 4470219089Spjd cb_data[0]->zcd_txg = txg; 4471219089Spjd cb_data[1] = ztest_create_cb_data(os, txg); 4472219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]); 4473219089Spjd 4474219089Spjd if (error) { 4475219089Spjd /* 4476219089Spjd * It's not a strict requirement to call the registered 4477219089Spjd * callbacks from inside dmu_tx_abort(), but that's what 4478219089Spjd * it's supposed to happen in the current implementation 4479219089Spjd * so we will check for that. 4480219089Spjd */ 4481219089Spjd for (i = 0; i < 2; i++) { 4482219089Spjd cb_data[i]->zcd_expected_err = ECANCELED; 4483219089Spjd VERIFY(!cb_data[i]->zcd_called); 4484219089Spjd } 4485219089Spjd 4486219089Spjd dmu_tx_abort(tx); 4487219089Spjd 4488219089Spjd for (i = 0; i < 2; i++) { 4489219089Spjd VERIFY(cb_data[i]->zcd_called); 4490219089Spjd umem_free(cb_data[i], sizeof (ztest_cb_data_t)); 4491219089Spjd } 4492219089Spjd 4493219089Spjd return; 4494219089Spjd } 4495219089Spjd 4496219089Spjd cb_data[2] = ztest_create_cb_data(os, txg); 4497219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]); 4498219089Spjd 4499219089Spjd /* 4500219089Spjd * Read existing data to make sure there isn't a future leak. 4501219089Spjd */ 4502219089Spjd VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t), 4503219089Spjd &old_txg, DMU_READ_PREFETCH)); 4504219089Spjd 4505219089Spjd if (old_txg > txg) 4506219089Spjd fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64, 4507219089Spjd old_txg, txg); 4508219089Spjd 4509219089Spjd dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); 4510219089Spjd 4511219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4512219089Spjd 4513219089Spjd /* 4514219089Spjd * Since commit callbacks don't have any ordering requirement and since 4515219089Spjd * it is theoretically possible for a commit callback to be called 4516219089Spjd * after an arbitrary amount of time has elapsed since its txg has been 4517219089Spjd * synced, it is difficult to reliably determine whether a commit 4518219089Spjd * callback hasn't been called due to high load or due to a flawed 4519219089Spjd * implementation. 4520219089Spjd * 4521219089Spjd * In practice, we will assume that if after a certain number of txgs a 4522219089Spjd * commit callback hasn't been called, then most likely there's an 4523219089Spjd * implementation bug.. 4524219089Spjd */ 4525219089Spjd tmp_cb = list_head(&zcl.zcl_callbacks); 4526219089Spjd if (tmp_cb != NULL && 4527251635Sdelphij (txg - ZTEST_COMMIT_CALLBACK_THRESH) > tmp_cb->zcd_txg) { 4528219089Spjd fatal(0, "Commit callback threshold exceeded, oldest txg: %" 4529219089Spjd PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg); 4530219089Spjd } 4531219089Spjd 4532219089Spjd /* 4533219089Spjd * Let's find the place to insert our callbacks. 4534219089Spjd * 4535219089Spjd * Even though the list is ordered by txg, it is possible for the 4536219089Spjd * insertion point to not be the end because our txg may already be 4537219089Spjd * quiescing at this point and other callbacks in the open txg 4538219089Spjd * (from other objsets) may have sneaked in. 4539219089Spjd */ 4540219089Spjd tmp_cb = list_tail(&zcl.zcl_callbacks); 4541219089Spjd while (tmp_cb != NULL && tmp_cb->zcd_txg > txg) 4542219089Spjd tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb); 4543219089Spjd 4544219089Spjd /* Add the 3 callbacks to the list */ 4545219089Spjd for (i = 0; i < 3; i++) { 4546219089Spjd if (tmp_cb == NULL) 4547219089Spjd list_insert_head(&zcl.zcl_callbacks, cb_data[i]); 4548219089Spjd else 4549219089Spjd list_insert_after(&zcl.zcl_callbacks, tmp_cb, 4550219089Spjd cb_data[i]); 4551219089Spjd 4552219089Spjd cb_data[i]->zcd_added = B_TRUE; 4553219089Spjd VERIFY(!cb_data[i]->zcd_called); 4554219089Spjd 4555219089Spjd tmp_cb = cb_data[i]; 4556219089Spjd } 4557219089Spjd 4558219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4559219089Spjd 4560219089Spjd dmu_tx_commit(tx); 4561219089Spjd} 4562219089Spjd 4563219089Spjd/* ARGSUSED */ 4564219089Spjdvoid 4565219089Spjdztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) 4566219089Spjd{ 4567219089Spjd zfs_prop_t proplist[] = { 4568219089Spjd ZFS_PROP_CHECKSUM, 4569219089Spjd ZFS_PROP_COMPRESSION, 4570219089Spjd ZFS_PROP_COPIES, 4571219089Spjd ZFS_PROP_DEDUP 4572219089Spjd }; 4573219089Spjd 4574236143Smm (void) rw_rdlock(&ztest_name_lock); 4575219089Spjd 4576219089Spjd for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) 4577219089Spjd (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], 4578219089Spjd ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); 4579219089Spjd 4580236143Smm (void) rw_unlock(&ztest_name_lock); 4581219089Spjd} 4582219089Spjd 4583219089Spjd/* ARGSUSED */ 4584219089Spjdvoid 4585219089Spjdztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) 4586219089Spjd{ 4587219089Spjd nvlist_t *props = NULL; 4588219089Spjd 4589236143Smm (void) rw_rdlock(&ztest_name_lock); 4590219089Spjd 4591236143Smm (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO, 4592219089Spjd ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); 4593219089Spjd 4594240415Smm VERIFY0(spa_prop_get(ztest_spa, &props)); 4595219089Spjd 4596236143Smm if (ztest_opts.zo_verbose >= 6) 4597219089Spjd dump_nvlist(props, 4); 4598219089Spjd 4599219089Spjd nvlist_free(props); 4600219089Spjd 4601236143Smm (void) rw_unlock(&ztest_name_lock); 4602219089Spjd} 4603219089Spjd 4604248571Smmstatic int 4605248571Smmuser_release_one(const char *snapname, const char *holdname) 4606248571Smm{ 4607248571Smm nvlist_t *snaps, *holds; 4608248571Smm int error; 4609248571Smm 4610248571Smm snaps = fnvlist_alloc(); 4611248571Smm holds = fnvlist_alloc(); 4612248571Smm fnvlist_add_boolean(holds, holdname); 4613248571Smm fnvlist_add_nvlist(snaps, snapname, holds); 4614248571Smm fnvlist_free(holds); 4615248571Smm error = dsl_dataset_user_release(snaps, NULL); 4616248571Smm fnvlist_free(snaps); 4617248571Smm return (error); 4618248571Smm} 4619248571Smm 4620219089Spjd/* 4621219089Spjd * Test snapshot hold/release and deferred destroy. 4622219089Spjd */ 4623219089Spjdvoid 4624219089Spjdztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) 4625219089Spjd{ 4626219089Spjd int error; 4627219089Spjd objset_t *os = zd->zd_os; 4628219089Spjd objset_t *origin; 4629219089Spjd char snapname[100]; 4630219089Spjd char fullname[100]; 4631219089Spjd char clonename[100]; 4632219089Spjd char tag[100]; 4633168404Spjd char osname[MAXNAMELEN]; 4634248571Smm nvlist_t *holds; 4635168404Spjd 4636236143Smm (void) rw_rdlock(&ztest_name_lock); 4637168404Spjd 4638168404Spjd dmu_objset_name(os, osname); 4639168404Spjd 4640248571Smm (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id); 4641248571Smm (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname); 4642248571Smm (void) snprintf(clonename, sizeof (clonename), 4643248571Smm "%s/ch1_%llu", osname, id); 4644248571Smm (void) snprintf(tag, sizeof (tag), "tag_%llu", id); 4645219089Spjd 4646219089Spjd /* 4647219089Spjd * Clean up from any previous run. 4648219089Spjd */ 4649248571Smm error = dsl_destroy_head(clonename); 4650248571Smm if (error != ENOENT) 4651248571Smm ASSERT0(error); 4652248571Smm error = user_release_one(fullname, tag); 4653248571Smm if (error != ESRCH && error != ENOENT) 4654248571Smm ASSERT0(error); 4655248571Smm error = dsl_destroy_snapshot(fullname, B_FALSE); 4656248571Smm if (error != ENOENT) 4657248571Smm ASSERT0(error); 4658219089Spjd 4659219089Spjd /* 4660219089Spjd * Create snapshot, clone it, mark snap for deferred destroy, 4661219089Spjd * destroy clone, verify snap was also destroyed. 4662219089Spjd */ 4663248571Smm error = dmu_objset_snapshot_one(osname, snapname); 4664219089Spjd if (error) { 4665219089Spjd if (error == ENOSPC) { 4666219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4667219089Spjd goto out; 4668168404Spjd } 4669219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4670219089Spjd } 4671168404Spjd 4672248571Smm error = dmu_objset_clone(clonename, fullname); 4673219089Spjd if (error) { 4674168404Spjd if (error == ENOSPC) { 4675219089Spjd ztest_record_enospc("dmu_objset_clone"); 4676219089Spjd goto out; 4677168404Spjd } 4678219089Spjd fatal(0, "dmu_objset_clone(%s) = %d", clonename, error); 4679219089Spjd } 4680168404Spjd 4681248571Smm error = dsl_destroy_snapshot(fullname, B_TRUE); 4682219089Spjd if (error) { 4683248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", 4684219089Spjd fullname, error); 4685219089Spjd } 4686168404Spjd 4687248571Smm error = dsl_destroy_head(clonename); 4688219089Spjd if (error) 4689248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clonename, error); 4690168404Spjd 4691219089Spjd error = dmu_objset_hold(fullname, FTAG, &origin); 4692219089Spjd if (error != ENOENT) 4693219089Spjd fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); 4694168404Spjd 4695219089Spjd /* 4696219089Spjd * Create snapshot, add temporary hold, verify that we can't 4697219089Spjd * destroy a held snapshot, mark for deferred destroy, 4698219089Spjd * release hold, verify snapshot was destroyed. 4699219089Spjd */ 4700248571Smm error = dmu_objset_snapshot_one(osname, snapname); 4701219089Spjd if (error) { 4702219089Spjd if (error == ENOSPC) { 4703219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4704219089Spjd goto out; 4705168404Spjd } 4706219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4707168404Spjd } 4708168404Spjd 4709248571Smm holds = fnvlist_alloc(); 4710248571Smm fnvlist_add_string(holds, fullname, tag); 4711248571Smm error = dsl_dataset_user_hold(holds, 0, NULL); 4712248571Smm fnvlist_free(holds); 4713248571Smm 4714219089Spjd if (error) 4715219089Spjd fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag); 4716219089Spjd 4717248571Smm error = dsl_destroy_snapshot(fullname, B_FALSE); 4718219089Spjd if (error != EBUSY) { 4719248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d", 4720219089Spjd fullname, error); 4721219089Spjd } 4722219089Spjd 4723248571Smm error = dsl_destroy_snapshot(fullname, B_TRUE); 4724219089Spjd if (error) { 4725248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", 4726219089Spjd fullname, error); 4727219089Spjd } 4728219089Spjd 4729248571Smm error = user_release_one(fullname, tag); 4730219089Spjd if (error) 4731251646Sdelphij fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error); 4732219089Spjd 4733248571Smm VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); 4734219089Spjd 4735219089Spjdout: 4736236143Smm (void) rw_unlock(&ztest_name_lock); 4737168404Spjd} 4738168404Spjd 4739168404Spjd/* 4740168404Spjd * Inject random faults into the on-disk data. 4741168404Spjd */ 4742219089Spjd/* ARGSUSED */ 4743168404Spjdvoid 4744219089Spjdztest_fault_inject(ztest_ds_t *zd, uint64_t id) 4745168404Spjd{ 4746219089Spjd ztest_shared_t *zs = ztest_shared; 4747236143Smm spa_t *spa = ztest_spa; 4748168404Spjd int fd; 4749168404Spjd uint64_t offset; 4750219089Spjd uint64_t leaves; 4751168404Spjd uint64_t bad = 0x1990c0ffeedecadeULL; 4752168404Spjd uint64_t top, leaf; 4753168404Spjd char path0[MAXPATHLEN]; 4754168404Spjd char pathrand[MAXPATHLEN]; 4755168404Spjd size_t fsize; 4756168404Spjd int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ 4757168404Spjd int iters = 1000; 4758219089Spjd int maxfaults; 4759219089Spjd int mirror_save; 4760185029Spjd vdev_t *vd0 = NULL; 4761168404Spjd uint64_t guid0 = 0; 4762219089Spjd boolean_t islog = B_FALSE; 4763168404Spjd 4764236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4765219089Spjd maxfaults = MAXFAULTS(); 4766236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 4767219089Spjd mirror_save = zs->zs_mirrors; 4768236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4769219089Spjd 4770185029Spjd ASSERT(leaves >= 1); 4771168404Spjd 4772168404Spjd /* 4773254074Sdelphij * Grab the name lock as reader. There are some operations 4774254074Sdelphij * which don't like to have their vdevs changed while 4775254074Sdelphij * they are in progress (i.e. spa_change_guid). Those 4776254074Sdelphij * operations will have grabbed the name lock as writer. 4777254074Sdelphij */ 4778254074Sdelphij (void) rw_rdlock(&ztest_name_lock); 4779254074Sdelphij 4780254074Sdelphij /* 4781185029Spjd * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. 4782168404Spjd */ 4783185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4784168404Spjd 4785185029Spjd if (ztest_random(2) == 0) { 4786185029Spjd /* 4787219089Spjd * Inject errors on a normal data device or slog device. 4788185029Spjd */ 4789219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 4790219089Spjd leaf = ztest_random(leaves) + zs->zs_splits; 4791168404Spjd 4792185029Spjd /* 4793185029Spjd * Generate paths to the first leaf in this top-level vdev, 4794185029Spjd * and to the random leaf we selected. We'll induce transient 4795185029Spjd * write failures and random online/offline activity on leaf 0, 4796185029Spjd * and we'll write random garbage to the randomly chosen leaf. 4797185029Spjd */ 4798185029Spjd (void) snprintf(path0, sizeof (path0), ztest_dev_template, 4799236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4800236143Smm top * leaves + zs->zs_splits); 4801185029Spjd (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template, 4802236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4803236143Smm top * leaves + leaf); 4804168404Spjd 4805185029Spjd vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); 4806219089Spjd if (vd0 != NULL && vd0->vdev_top->vdev_islog) 4807219089Spjd islog = B_TRUE; 4808219089Spjd 4809254074Sdelphij /* 4810254074Sdelphij * If the top-level vdev needs to be resilvered 4811254074Sdelphij * then we only allow faults on the device that is 4812254074Sdelphij * resilvering. 4813254074Sdelphij */ 4814254074Sdelphij if (vd0 != NULL && maxfaults != 1 && 4815254074Sdelphij (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) || 4816254112Sdelphij vd0->vdev_resilver_txg != 0)) { 4817185029Spjd /* 4818185029Spjd * Make vd0 explicitly claim to be unreadable, 4819185029Spjd * or unwriteable, or reach behind its back 4820185029Spjd * and close the underlying fd. We can do this if 4821185029Spjd * maxfaults == 0 because we'll fail and reexecute, 4822185029Spjd * and we can do it if maxfaults >= 2 because we'll 4823185029Spjd * have enough redundancy. If maxfaults == 1, the 4824185029Spjd * combination of this with injection of random data 4825185029Spjd * corruption below exceeds the pool's fault tolerance. 4826185029Spjd */ 4827185029Spjd vdev_file_t *vf = vd0->vdev_tsd; 4828168404Spjd 4829185029Spjd if (vf != NULL && ztest_random(3) == 0) { 4830185029Spjd (void) close(vf->vf_vnode->v_fd); 4831185029Spjd vf->vf_vnode->v_fd = -1; 4832185029Spjd } else if (ztest_random(2) == 0) { 4833185029Spjd vd0->vdev_cant_read = B_TRUE; 4834185029Spjd } else { 4835185029Spjd vd0->vdev_cant_write = B_TRUE; 4836185029Spjd } 4837185029Spjd guid0 = vd0->vdev_guid; 4838185029Spjd } 4839185029Spjd } else { 4840185029Spjd /* 4841185029Spjd * Inject errors on an l2cache device. 4842185029Spjd */ 4843185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 4844168404Spjd 4845185029Spjd if (sav->sav_count == 0) { 4846185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4847254074Sdelphij (void) rw_unlock(&ztest_name_lock); 4848185029Spjd return; 4849185029Spjd } 4850185029Spjd vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; 4851168404Spjd guid0 = vd0->vdev_guid; 4852185029Spjd (void) strcpy(path0, vd0->vdev_path); 4853185029Spjd (void) strcpy(pathrand, vd0->vdev_path); 4854185029Spjd 4855185029Spjd leaf = 0; 4856185029Spjd leaves = 1; 4857185029Spjd maxfaults = INT_MAX; /* no limit on cache devices */ 4858168404Spjd } 4859168404Spjd 4860185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4861254074Sdelphij (void) rw_unlock(&ztest_name_lock); 4862185029Spjd 4863168404Spjd /* 4864219089Spjd * If we can tolerate two or more faults, or we're dealing 4865219089Spjd * with a slog, randomly online/offline vd0. 4866168404Spjd */ 4867219089Spjd if ((maxfaults >= 2 || islog) && guid0 != 0) { 4868209962Smm if (ztest_random(10) < 6) { 4869209962Smm int flags = (ztest_random(2) == 0 ? 4870209962Smm ZFS_OFFLINE_TEMPORARY : 0); 4871219089Spjd 4872219089Spjd /* 4873219089Spjd * We have to grab the zs_name_lock as writer to 4874219089Spjd * prevent a race between offlining a slog and 4875219089Spjd * destroying a dataset. Offlining the slog will 4876219089Spjd * grab a reference on the dataset which may cause 4877219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 4878219089Spjd * leaving the dataset in an inconsistent state. 4879219089Spjd */ 4880219089Spjd if (islog) 4881236143Smm (void) rw_wrlock(&ztest_name_lock); 4882219089Spjd 4883209962Smm VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); 4884219089Spjd 4885219089Spjd if (islog) 4886236143Smm (void) rw_unlock(&ztest_name_lock); 4887209962Smm } else { 4888242845Sdelphij /* 4889242845Sdelphij * Ideally we would like to be able to randomly 4890242845Sdelphij * call vdev_[on|off]line without holding locks 4891242845Sdelphij * to force unpredictable failures but the side 4892242845Sdelphij * effects of vdev_[on|off]line prevent us from 4893242845Sdelphij * doing so. We grab the ztest_vdev_lock here to 4894242845Sdelphij * prevent a race between injection testing and 4895242845Sdelphij * aux_vdev removal. 4896242845Sdelphij */ 4897242845Sdelphij VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4898209962Smm (void) vdev_online(spa, guid0, 0, NULL); 4899242845Sdelphij VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4900209962Smm } 4901168404Spjd } 4902168404Spjd 4903219089Spjd if (maxfaults == 0) 4904219089Spjd return; 4905219089Spjd 4906168404Spjd /* 4907168404Spjd * We have at least single-fault tolerance, so inject data corruption. 4908168404Spjd */ 4909168404Spjd fd = open(pathrand, O_RDWR); 4910168404Spjd 4911168404Spjd if (fd == -1) /* we hit a gap in the device namespace */ 4912168404Spjd return; 4913168404Spjd 4914168404Spjd fsize = lseek(fd, 0, SEEK_END); 4915168404Spjd 4916168404Spjd while (--iters != 0) { 4917168404Spjd offset = ztest_random(fsize / (leaves << bshift)) * 4918168404Spjd (leaves << bshift) + (leaf << bshift) + 4919168404Spjd (ztest_random(1ULL << (bshift - 1)) & -8ULL); 4920168404Spjd 4921168404Spjd if (offset >= fsize) 4922168404Spjd continue; 4923168404Spjd 4924236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4925219089Spjd if (mirror_save != zs->zs_mirrors) { 4926236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4927219089Spjd (void) close(fd); 4928219089Spjd return; 4929219089Spjd } 4930168404Spjd 4931168404Spjd if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad)) 4932168404Spjd fatal(1, "can't inject bad word at 0x%llx in %s", 4933168404Spjd offset, pathrand); 4934219089Spjd 4935236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4936219089Spjd 4937236143Smm if (ztest_opts.zo_verbose >= 7) 4938219089Spjd (void) printf("injected bad word into %s," 4939219089Spjd " offset 0x%llx\n", pathrand, (u_longlong_t)offset); 4940168404Spjd } 4941168404Spjd 4942168404Spjd (void) close(fd); 4943168404Spjd} 4944168404Spjd 4945168404Spjd/* 4946219089Spjd * Verify that DDT repair works as expected. 4947219089Spjd */ 4948219089Spjdvoid 4949219089Spjdztest_ddt_repair(ztest_ds_t *zd, uint64_t id) 4950219089Spjd{ 4951219089Spjd ztest_shared_t *zs = ztest_shared; 4952236143Smm spa_t *spa = ztest_spa; 4953219089Spjd objset_t *os = zd->zd_os; 4954219089Spjd ztest_od_t od[1]; 4955219089Spjd uint64_t object, blocksize, txg, pattern, psize; 4956219089Spjd enum zio_checksum checksum = spa_dedup_checksum(spa); 4957219089Spjd dmu_buf_t *db; 4958219089Spjd dmu_tx_t *tx; 4959219089Spjd void *buf; 4960219089Spjd blkptr_t blk; 4961219089Spjd int copies = 2 * ZIO_DEDUPDITTO_MIN; 4962219089Spjd 4963219089Spjd blocksize = ztest_random_blocksize(); 4964219089Spjd blocksize = MIN(blocksize, 2048); /* because we write so many */ 4965219089Spjd 4966219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4967219089Spjd 4968219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4969219089Spjd return; 4970219089Spjd 4971219089Spjd /* 4972219089Spjd * Take the name lock as writer to prevent anyone else from changing 4973219089Spjd * the pool and dataset properies we need to maintain during this test. 4974219089Spjd */ 4975236143Smm (void) rw_wrlock(&ztest_name_lock); 4976219089Spjd 4977219089Spjd if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, 4978219089Spjd B_FALSE) != 0 || 4979219089Spjd ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, 4980219089Spjd B_FALSE) != 0) { 4981236143Smm (void) rw_unlock(&ztest_name_lock); 4982219089Spjd return; 4983219089Spjd } 4984219089Spjd 4985219089Spjd object = od[0].od_object; 4986219089Spjd blocksize = od[0].od_blocksize; 4987228103Smm pattern = zs->zs_guid ^ dmu_objset_fsid_guid(os); 4988219089Spjd 4989219089Spjd ASSERT(object != 0); 4990219089Spjd 4991219089Spjd tx = dmu_tx_create(os); 4992219089Spjd dmu_tx_hold_write(tx, object, 0, copies * blocksize); 4993219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 4994219089Spjd if (txg == 0) { 4995236143Smm (void) rw_unlock(&ztest_name_lock); 4996219089Spjd return; 4997219089Spjd } 4998219089Spjd 4999219089Spjd /* 5000219089Spjd * Write all the copies of our block. 5001219089Spjd */ 5002219089Spjd for (int i = 0; i < copies; i++) { 5003219089Spjd uint64_t offset = i * blocksize; 5004248571Smm int error = dmu_buf_hold(os, object, offset, FTAG, &db, 5005248571Smm DMU_READ_NO_PREFETCH); 5006248571Smm if (error != 0) { 5007248571Smm fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u", 5008248571Smm os, (long long)object, (long long) offset, error); 5009248571Smm } 5010219089Spjd ASSERT(db->db_offset == offset); 5011219089Spjd ASSERT(db->db_size == blocksize); 5012219089Spjd ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) || 5013219089Spjd ztest_pattern_match(db->db_data, db->db_size, 0ULL)); 5014219089Spjd dmu_buf_will_fill(db, tx); 5015219089Spjd ztest_pattern_set(db->db_data, db->db_size, pattern); 5016219089Spjd dmu_buf_rele(db, FTAG); 5017219089Spjd } 5018219089Spjd 5019219089Spjd dmu_tx_commit(tx); 5020219089Spjd txg_wait_synced(spa_get_dsl(spa), txg); 5021219089Spjd 5022219089Spjd /* 5023219089Spjd * Find out what block we got. 5024219089Spjd */ 5025243524Smm VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db, 5026243524Smm DMU_READ_NO_PREFETCH)); 5027219089Spjd blk = *((dmu_buf_impl_t *)db)->db_blkptr; 5028219089Spjd dmu_buf_rele(db, FTAG); 5029219089Spjd 5030219089Spjd /* 5031219089Spjd * Damage the block. Dedup-ditto will save us when we read it later. 5032219089Spjd */ 5033219089Spjd psize = BP_GET_PSIZE(&blk); 5034219089Spjd buf = zio_buf_alloc(psize); 5035219089Spjd ztest_pattern_set(buf, psize, ~pattern); 5036219089Spjd 5037219089Spjd (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk, 5038219089Spjd buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, 5039219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL)); 5040219089Spjd 5041219089Spjd zio_buf_free(buf, psize); 5042219089Spjd 5043236143Smm (void) rw_unlock(&ztest_name_lock); 5044219089Spjd} 5045219089Spjd 5046219089Spjd/* 5047168404Spjd * Scrub the pool. 5048168404Spjd */ 5049219089Spjd/* ARGSUSED */ 5050168404Spjdvoid 5051219089Spjdztest_scrub(ztest_ds_t *zd, uint64_t id) 5052168404Spjd{ 5053236143Smm spa_t *spa = ztest_spa; 5054168404Spjd 5055219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5056219089Spjd (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */ 5057219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5058168404Spjd} 5059168404Spjd 5060168404Spjd/* 5061228103Smm * Change the guid for the pool. 5062228103Smm */ 5063228103Smm/* ARGSUSED */ 5064228103Smmvoid 5065228103Smmztest_reguid(ztest_ds_t *zd, uint64_t id) 5066228103Smm{ 5067236143Smm spa_t *spa = ztest_spa; 5068228103Smm uint64_t orig, load; 5069239620Smm int error; 5070228103Smm 5071228103Smm orig = spa_guid(spa); 5072228103Smm load = spa_load_guid(spa); 5073239620Smm 5074239620Smm (void) rw_wrlock(&ztest_name_lock); 5075239620Smm error = spa_change_guid(spa); 5076239620Smm (void) rw_unlock(&ztest_name_lock); 5077239620Smm 5078239620Smm if (error != 0) 5079228103Smm return; 5080228103Smm 5081243505Smm if (ztest_opts.zo_verbose >= 4) { 5082228103Smm (void) printf("Changed guid old %llu -> %llu\n", 5083228103Smm (u_longlong_t)orig, (u_longlong_t)spa_guid(spa)); 5084228103Smm } 5085228103Smm 5086228103Smm VERIFY3U(orig, !=, spa_guid(spa)); 5087228103Smm VERIFY3U(load, ==, spa_load_guid(spa)); 5088228103Smm} 5089228103Smm 5090228103Smm/* 5091168404Spjd * Rename the pool to a different name and then rename it back. 5092168404Spjd */ 5093219089Spjd/* ARGSUSED */ 5094168404Spjdvoid 5095219089Spjdztest_spa_rename(ztest_ds_t *zd, uint64_t id) 5096168404Spjd{ 5097168404Spjd char *oldname, *newname; 5098168404Spjd spa_t *spa; 5099168404Spjd 5100236143Smm (void) rw_wrlock(&ztest_name_lock); 5101168404Spjd 5102236143Smm oldname = ztest_opts.zo_pool; 5103168404Spjd newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); 5104168404Spjd (void) strcpy(newname, oldname); 5105168404Spjd (void) strcat(newname, "_tmp"); 5106168404Spjd 5107168404Spjd /* 5108168404Spjd * Do the rename 5109168404Spjd */ 5110219089Spjd VERIFY3U(0, ==, spa_rename(oldname, newname)); 5111168404Spjd 5112168404Spjd /* 5113168404Spjd * Try to open it under the old name, which shouldn't exist 5114168404Spjd */ 5115219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5116168404Spjd 5117168404Spjd /* 5118168404Spjd * Open it under the new name and make sure it's still the same spa_t. 5119168404Spjd */ 5120219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5121168404Spjd 5122236143Smm ASSERT(spa == ztest_spa); 5123168404Spjd spa_close(spa, FTAG); 5124168404Spjd 5125168404Spjd /* 5126168404Spjd * Rename it back to the original 5127168404Spjd */ 5128219089Spjd VERIFY3U(0, ==, spa_rename(newname, oldname)); 5129168404Spjd 5130168404Spjd /* 5131168404Spjd * Make sure it can still be opened 5132168404Spjd */ 5133219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5134168404Spjd 5135236143Smm ASSERT(spa == ztest_spa); 5136168404Spjd spa_close(spa, FTAG); 5137168404Spjd 5138168404Spjd umem_free(newname, strlen(newname) + 1); 5139168404Spjd 5140236143Smm (void) rw_unlock(&ztest_name_lock); 5141168404Spjd} 5142168404Spjd 5143168404Spjd/* 5144219089Spjd * Verify pool integrity by running zdb. 5145168404Spjd */ 5146168404Spjdstatic void 5147219089Spjdztest_run_zdb(char *pool) 5148168404Spjd{ 5149168404Spjd int status; 5150168404Spjd char zdb[MAXPATHLEN + MAXNAMELEN + 20]; 5151168404Spjd char zbuf[1024]; 5152168404Spjd char *bin; 5153185029Spjd char *ztest; 5154185029Spjd char *isa; 5155185029Spjd int isalen; 5156168404Spjd FILE *fp; 5157168404Spjd 5158214623Spjd strlcpy(zdb, "/usr/bin/ztest", sizeof(zdb)); 5159168404Spjd 5160168404Spjd /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ 5161168404Spjd bin = strstr(zdb, "/usr/bin/"); 5162185029Spjd ztest = strstr(bin, "/ztest"); 5163185029Spjd isa = bin + 8; 5164185029Spjd isalen = ztest - isa; 5165185029Spjd isa = strdup(isa); 5166168404Spjd /* LINTED */ 5167185029Spjd (void) sprintf(bin, 5168219089Spjd "/usr/sbin%.*s/zdb -bcc%s%s -U %s %s", 5169185029Spjd isalen, 5170185029Spjd isa, 5171236143Smm ztest_opts.zo_verbose >= 3 ? "s" : "", 5172236143Smm ztest_opts.zo_verbose >= 4 ? "v" : "", 5173219089Spjd spa_config_path, 5174208047Smm pool); 5175185029Spjd free(isa); 5176168404Spjd 5177236143Smm if (ztest_opts.zo_verbose >= 5) 5178168404Spjd (void) printf("Executing %s\n", strstr(zdb, "zdb ")); 5179168404Spjd 5180168404Spjd fp = popen(zdb, "r"); 5181168404Spjd assert(fp != NULL); 5182168404Spjd 5183168404Spjd while (fgets(zbuf, sizeof (zbuf), fp) != NULL) 5184236143Smm if (ztest_opts.zo_verbose >= 3) 5185168404Spjd (void) printf("%s", zbuf); 5186168404Spjd 5187168404Spjd status = pclose(fp); 5188168404Spjd 5189168404Spjd if (status == 0) 5190168404Spjd return; 5191168404Spjd 5192168404Spjd ztest_dump_core = 0; 5193168404Spjd if (WIFEXITED(status)) 5194168404Spjd fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status)); 5195168404Spjd else 5196168404Spjd fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status)); 5197168404Spjd} 5198168404Spjd 5199168404Spjdstatic void 5200168404Spjdztest_walk_pool_directory(char *header) 5201168404Spjd{ 5202168404Spjd spa_t *spa = NULL; 5203168404Spjd 5204236143Smm if (ztest_opts.zo_verbose >= 6) 5205168404Spjd (void) printf("%s\n", header); 5206168404Spjd 5207168404Spjd mutex_enter(&spa_namespace_lock); 5208168404Spjd while ((spa = spa_next(spa)) != NULL) 5209236143Smm if (ztest_opts.zo_verbose >= 6) 5210168404Spjd (void) printf("\t%s\n", spa_name(spa)); 5211168404Spjd mutex_exit(&spa_namespace_lock); 5212168404Spjd} 5213168404Spjd 5214168404Spjdstatic void 5215168404Spjdztest_spa_import_export(char *oldname, char *newname) 5216168404Spjd{ 5217209962Smm nvlist_t *config, *newconfig; 5218168404Spjd uint64_t pool_guid; 5219168404Spjd spa_t *spa; 5220248571Smm int error; 5221168404Spjd 5222236143Smm if (ztest_opts.zo_verbose >= 4) { 5223168404Spjd (void) printf("import/export: old = %s, new = %s\n", 5224168404Spjd oldname, newname); 5225168404Spjd } 5226168404Spjd 5227168404Spjd /* 5228168404Spjd * Clean up from previous runs. 5229168404Spjd */ 5230168404Spjd (void) spa_destroy(newname); 5231168404Spjd 5232168404Spjd /* 5233168404Spjd * Get the pool's configuration and guid. 5234168404Spjd */ 5235219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5236168404Spjd 5237209962Smm /* 5238209962Smm * Kick off a scrub to tickle scrub/export races. 5239209962Smm */ 5240209962Smm if (ztest_random(2) == 0) 5241219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5242209962Smm 5243168404Spjd pool_guid = spa_guid(spa); 5244168404Spjd spa_close(spa, FTAG); 5245168404Spjd 5246168404Spjd ztest_walk_pool_directory("pools before export"); 5247168404Spjd 5248168404Spjd /* 5249168404Spjd * Export it. 5250168404Spjd */ 5251219089Spjd VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE)); 5252168404Spjd 5253168404Spjd ztest_walk_pool_directory("pools after export"); 5254168404Spjd 5255168404Spjd /* 5256209962Smm * Try to import it. 5257209962Smm */ 5258209962Smm newconfig = spa_tryimport(config); 5259209962Smm ASSERT(newconfig != NULL); 5260209962Smm nvlist_free(newconfig); 5261209962Smm 5262209962Smm /* 5263168404Spjd * Import it under the new name. 5264168404Spjd */ 5265248571Smm error = spa_import(newname, config, NULL, 0); 5266248571Smm if (error != 0) { 5267248571Smm dump_nvlist(config, 0); 5268248571Smm fatal(B_FALSE, "couldn't import pool %s as %s: error %u", 5269248571Smm oldname, newname, error); 5270248571Smm } 5271168404Spjd 5272168404Spjd ztest_walk_pool_directory("pools after import"); 5273168404Spjd 5274168404Spjd /* 5275168404Spjd * Try to import it again -- should fail with EEXIST. 5276168404Spjd */ 5277219089Spjd VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0)); 5278168404Spjd 5279168404Spjd /* 5280168404Spjd * Try to import it under a different name -- should fail with EEXIST. 5281168404Spjd */ 5282219089Spjd VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0)); 5283168404Spjd 5284168404Spjd /* 5285168404Spjd * Verify that the pool is no longer visible under the old name. 5286168404Spjd */ 5287219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5288168404Spjd 5289168404Spjd /* 5290168404Spjd * Verify that we can open and close the pool using the new name. 5291168404Spjd */ 5292219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5293168404Spjd ASSERT(pool_guid == spa_guid(spa)); 5294168404Spjd spa_close(spa, FTAG); 5295168404Spjd 5296168404Spjd nvlist_free(config); 5297168404Spjd} 5298168404Spjd 5299209962Smmstatic void 5300209962Smmztest_resume(spa_t *spa) 5301209962Smm{ 5302236143Smm if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6) 5303219089Spjd (void) printf("resuming from suspended state\n"); 5304219089Spjd spa_vdev_state_enter(spa, SCL_NONE); 5305219089Spjd vdev_clear(spa, NULL); 5306219089Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 5307219089Spjd (void) zio_resume(spa); 5308209962Smm} 5309209962Smm 5310168404Spjdstatic void * 5311209962Smmztest_resume_thread(void *arg) 5312185029Spjd{ 5313185029Spjd spa_t *spa = arg; 5314185029Spjd 5315185029Spjd while (!ztest_exiting) { 5316219089Spjd if (spa_suspended(spa)) 5317219089Spjd ztest_resume(spa); 5318219089Spjd (void) poll(NULL, 0, 100); 5319185029Spjd } 5320185029Spjd return (NULL); 5321185029Spjd} 5322185029Spjd 5323185029Spjdstatic void * 5324219089Spjdztest_deadman_thread(void *arg) 5325219089Spjd{ 5326219089Spjd ztest_shared_t *zs = arg; 5327254074Sdelphij spa_t *spa = ztest_spa; 5328254074Sdelphij hrtime_t delta, total = 0; 5329219089Spjd 5330254074Sdelphij for (;;) { 5331258632Savg delta = zs->zs_thread_stop - zs->zs_thread_start + 5332258632Savg MSEC2NSEC(zfs_deadman_synctime_ms); 5333219089Spjd 5334258632Savg (void) poll(NULL, 0, (int)NSEC2MSEC(delta)); 5335219089Spjd 5336254074Sdelphij /* 5337254074Sdelphij * If the pool is suspended then fail immediately. Otherwise, 5338254074Sdelphij * check to see if the pool is making any progress. If 5339254074Sdelphij * vdev_deadman() discovers that there hasn't been any recent 5340254074Sdelphij * I/Os then it will end up aborting the tests. 5341254074Sdelphij */ 5342258717Savg if (spa_suspended(spa) || spa->spa_root_vdev == NULL) { 5343254074Sdelphij fatal(0, "aborting test after %llu seconds because " 5344254074Sdelphij "pool has transitioned to a suspended state.", 5345258632Savg zfs_deadman_synctime_ms / 1000); 5346254074Sdelphij return (NULL); 5347254074Sdelphij } 5348254074Sdelphij vdev_deadman(spa->spa_root_vdev); 5349219089Spjd 5350258632Savg total += zfs_deadman_synctime_ms/1000; 5351254074Sdelphij (void) printf("ztest has been running for %lld seconds\n", 5352254074Sdelphij total); 5353254074Sdelphij } 5354219089Spjd} 5355219089Spjd 5356219089Spjdstatic void 5357236143Smmztest_execute(int test, ztest_info_t *zi, uint64_t id) 5358219089Spjd{ 5359236143Smm ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets]; 5360236143Smm ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test); 5361219089Spjd hrtime_t functime = gethrtime(); 5362219089Spjd 5363219089Spjd for (int i = 0; i < zi->zi_iters; i++) 5364219089Spjd zi->zi_func(zd, id); 5365219089Spjd 5366219089Spjd functime = gethrtime() - functime; 5367219089Spjd 5368236143Smm atomic_add_64(&zc->zc_count, 1); 5369236143Smm atomic_add_64(&zc->zc_time, functime); 5370219089Spjd 5371236143Smm if (ztest_opts.zo_verbose >= 4) { 5372219089Spjd Dl_info dli; 5373219089Spjd (void) dladdr((void *)zi->zi_func, &dli); 5374219089Spjd (void) printf("%6.2f sec in %s\n", 5375219089Spjd (double)functime / NANOSEC, dli.dli_sname); 5376219089Spjd } 5377219089Spjd} 5378219089Spjd 5379219089Spjdstatic void * 5380168404Spjdztest_thread(void *arg) 5381168404Spjd{ 5382236143Smm int rand; 5383219089Spjd uint64_t id = (uintptr_t)arg; 5384168404Spjd ztest_shared_t *zs = ztest_shared; 5385219089Spjd uint64_t call_next; 5386219089Spjd hrtime_t now; 5387168404Spjd ztest_info_t *zi; 5388236143Smm ztest_shared_callstate_t *zc; 5389168404Spjd 5390219089Spjd while ((now = gethrtime()) < zs->zs_thread_stop) { 5391168404Spjd /* 5392168404Spjd * See if it's time to force a crash. 5393168404Spjd */ 5394219089Spjd if (now > zs->zs_thread_kill) 5395219089Spjd ztest_kill(zs); 5396168404Spjd 5397168404Spjd /* 5398219089Spjd * If we're getting ENOSPC with some regularity, stop. 5399168404Spjd */ 5400219089Spjd if (zs->zs_enospc_count > 10) 5401219089Spjd break; 5402168404Spjd 5403168404Spjd /* 5404219089Spjd * Pick a random function to execute. 5405168404Spjd */ 5406236143Smm rand = ztest_random(ZTEST_FUNCS); 5407236143Smm zi = &ztest_info[rand]; 5408236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(rand); 5409236143Smm call_next = zc->zc_next; 5410168404Spjd 5411219089Spjd if (now >= call_next && 5412236143Smm atomic_cas_64(&zc->zc_next, call_next, call_next + 5413236143Smm ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) { 5414236143Smm ztest_execute(rand, zi, id); 5415236143Smm } 5416219089Spjd } 5417168404Spjd 5418219089Spjd return (NULL); 5419219089Spjd} 5420168404Spjd 5421219089Spjdstatic void 5422219089Spjdztest_dataset_name(char *dsname, char *pool, int d) 5423219089Spjd{ 5424219089Spjd (void) snprintf(dsname, MAXNAMELEN, "%s/ds_%d", pool, d); 5425219089Spjd} 5426168404Spjd 5427219089Spjdstatic void 5428236143Smmztest_dataset_destroy(int d) 5429219089Spjd{ 5430219089Spjd char name[MAXNAMELEN]; 5431168404Spjd 5432236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5433168404Spjd 5434236143Smm if (ztest_opts.zo_verbose >= 3) 5435219089Spjd (void) printf("Destroying %s to free up space\n", name); 5436168404Spjd 5437219089Spjd /* 5438219089Spjd * Cleanup any non-standard clones and snapshots. In general, 5439219089Spjd * ztest thread t operates on dataset (t % zopt_datasets), 5440219089Spjd * so there may be more than one thing to clean up. 5441219089Spjd */ 5442236143Smm for (int t = d; t < ztest_opts.zo_threads; 5443236143Smm t += ztest_opts.zo_datasets) { 5444219089Spjd ztest_dsl_dataset_cleanup(name, t); 5445236143Smm } 5446219089Spjd 5447219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 5448219089Spjd DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 5449219089Spjd} 5450219089Spjd 5451219089Spjdstatic void 5452219089Spjdztest_dataset_dirobj_verify(ztest_ds_t *zd) 5453219089Spjd{ 5454219089Spjd uint64_t usedobjs, dirobjs, scratch; 5455219089Spjd 5456219089Spjd /* 5457219089Spjd * ZTEST_DIROBJ is the object directory for the entire dataset. 5458219089Spjd * Therefore, the number of objects in use should equal the 5459219089Spjd * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself. 5460219089Spjd * If not, we have an object leak. 5461219089Spjd * 5462219089Spjd * Note that we can only check this in ztest_dataset_open(), 5463219089Spjd * when the open-context and syncing-context values agree. 5464219089Spjd * That's because zap_count() returns the open-context value, 5465219089Spjd * while dmu_objset_space() returns the rootbp fill count. 5466219089Spjd */ 5467219089Spjd VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs)); 5468219089Spjd dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch); 5469219089Spjd ASSERT3U(dirobjs + 1, ==, usedobjs); 5470219089Spjd} 5471219089Spjd 5472219089Spjdstatic int 5473236143Smmztest_dataset_open(int d) 5474219089Spjd{ 5475236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5476236143Smm uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq; 5477219089Spjd objset_t *os; 5478219089Spjd zilog_t *zilog; 5479219089Spjd char name[MAXNAMELEN]; 5480219089Spjd int error; 5481219089Spjd 5482236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5483219089Spjd 5484236143Smm (void) rw_rdlock(&ztest_name_lock); 5485219089Spjd 5486219089Spjd error = ztest_dataset_create(name); 5487219089Spjd if (error == ENOSPC) { 5488236143Smm (void) rw_unlock(&ztest_name_lock); 5489219089Spjd ztest_record_enospc(FTAG); 5490219089Spjd return (error); 5491168404Spjd } 5492219089Spjd ASSERT(error == 0 || error == EEXIST); 5493168404Spjd 5494248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); 5495236143Smm (void) rw_unlock(&ztest_name_lock); 5496219089Spjd 5497236143Smm ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); 5498219089Spjd 5499219089Spjd zilog = zd->zd_zilog; 5500219089Spjd 5501219089Spjd if (zilog->zl_header->zh_claim_lr_seq != 0 && 5502219089Spjd zilog->zl_header->zh_claim_lr_seq < committed_seq) 5503219089Spjd fatal(0, "missing log records: claimed %llu < committed %llu", 5504219089Spjd zilog->zl_header->zh_claim_lr_seq, committed_seq); 5505219089Spjd 5506219089Spjd ztest_dataset_dirobj_verify(zd); 5507219089Spjd 5508219089Spjd zil_replay(os, zd, ztest_replay_vector); 5509219089Spjd 5510219089Spjd ztest_dataset_dirobj_verify(zd); 5511219089Spjd 5512236143Smm if (ztest_opts.zo_verbose >= 6) 5513219089Spjd (void) printf("%s replay %llu blocks, %llu records, seq %llu\n", 5514219089Spjd zd->zd_name, 5515219089Spjd (u_longlong_t)zilog->zl_parse_blk_count, 5516219089Spjd (u_longlong_t)zilog->zl_parse_lr_count, 5517219089Spjd (u_longlong_t)zilog->zl_replaying_seq); 5518219089Spjd 5519219089Spjd zilog = zil_open(os, ztest_get_data); 5520219089Spjd 5521219089Spjd if (zilog->zl_replaying_seq != 0 && 5522219089Spjd zilog->zl_replaying_seq < committed_seq) 5523219089Spjd fatal(0, "missing log records: replayed %llu < committed %llu", 5524219089Spjd zilog->zl_replaying_seq, committed_seq); 5525219089Spjd 5526219089Spjd return (0); 5527168404Spjd} 5528168404Spjd 5529219089Spjdstatic void 5530236143Smmztest_dataset_close(int d) 5531219089Spjd{ 5532236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5533219089Spjd 5534219089Spjd zil_close(zd->zd_zilog); 5535248571Smm dmu_objset_disown(zd->zd_os, zd); 5536219089Spjd 5537219089Spjd ztest_zd_fini(zd); 5538219089Spjd} 5539219089Spjd 5540168404Spjd/* 5541168404Spjd * Kick off threads to run tests on all datasets in parallel. 5542168404Spjd */ 5543168404Spjdstatic void 5544219089Spjdztest_run(ztest_shared_t *zs) 5545168404Spjd{ 5546219089Spjd thread_t *tid; 5547168404Spjd spa_t *spa; 5548228103Smm objset_t *os; 5549185029Spjd thread_t resume_tid; 5550219089Spjd int error; 5551168404Spjd 5552185029Spjd ztest_exiting = B_FALSE; 5553185029Spjd 5554168404Spjd /* 5555219089Spjd * Initialize parent/child shared state. 5556168404Spjd */ 5557236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5558236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5559168404Spjd 5560219089Spjd zs->zs_thread_start = gethrtime(); 5561236143Smm zs->zs_thread_stop = 5562236143Smm zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC; 5563219089Spjd zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop); 5564219089Spjd zs->zs_thread_kill = zs->zs_thread_stop; 5565236143Smm if (ztest_random(100) < ztest_opts.zo_killrate) { 5566236143Smm zs->zs_thread_kill -= 5567236143Smm ztest_random(ztest_opts.zo_passtime * NANOSEC); 5568236143Smm } 5569168404Spjd 5570219089Spjd (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL); 5571168404Spjd 5572219089Spjd list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), 5573219089Spjd offsetof(ztest_cb_data_t, zcd_node)); 5574168404Spjd 5575168404Spjd /* 5576219089Spjd * Open our pool. 5577168404Spjd */ 5578219089Spjd kernel_init(FREAD | FWRITE); 5579248571Smm VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5580224177Smm spa->spa_debug = B_TRUE; 5581236143Smm ztest_spa = spa; 5582168404Spjd 5583248571Smm VERIFY0(dmu_objset_own(ztest_opts.zo_pool, 5584248571Smm DMU_OST_ANY, B_TRUE, FTAG, &os)); 5585228103Smm zs->zs_guid = dmu_objset_fsid_guid(os); 5586248571Smm dmu_objset_disown(os, FTAG); 5587228103Smm 5588219089Spjd spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; 5589168404Spjd 5590168404Spjd /* 5591209962Smm * We don't expect the pool to suspend unless maxfaults == 0, 5592209962Smm * in which case ztest_fault_inject() temporarily takes away 5593209962Smm * the only valid replica. 5594209962Smm */ 5595219089Spjd if (MAXFAULTS() == 0) 5596209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_WAIT; 5597209962Smm else 5598209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; 5599209962Smm 5600209962Smm /* 5601185029Spjd * Create a thread to periodically resume suspended I/O. 5602185029Spjd */ 5603209962Smm VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, 5604185029Spjd &resume_tid) == 0); 5605185029Spjd 5606185029Spjd /* 5607219089Spjd * Create a deadman thread to abort() if we hang. 5608219089Spjd */ 5609219089Spjd VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, 5610219089Spjd NULL) == 0); 5611219089Spjd 5612219089Spjd /* 5613168404Spjd * Verify that we can safely inquire about about any object, 5614168404Spjd * whether it's allocated or not. To make it interesting, 5615168404Spjd * we probe a 5-wide window around each power of two. 5616168404Spjd * This hits all edge cases, including zero and the max. 5617168404Spjd */ 5618219089Spjd for (int t = 0; t < 64; t++) { 5619219089Spjd for (int d = -5; d <= 5; d++) { 5620168404Spjd error = dmu_object_info(spa->spa_meta_objset, 5621168404Spjd (1ULL << t) + d, NULL); 5622168404Spjd ASSERT(error == 0 || error == ENOENT || 5623168404Spjd error == EINVAL); 5624168404Spjd } 5625168404Spjd } 5626168404Spjd 5627168404Spjd /* 5628219089Spjd * If we got any ENOSPC errors on the previous run, destroy something. 5629168404Spjd */ 5630219089Spjd if (zs->zs_enospc_count != 0) { 5631236143Smm int d = ztest_random(ztest_opts.zo_datasets); 5632236143Smm ztest_dataset_destroy(d); 5633219089Spjd } 5634168404Spjd zs->zs_enospc_count = 0; 5635168404Spjd 5636236143Smm tid = umem_zalloc(ztest_opts.zo_threads * sizeof (thread_t), 5637236143Smm UMEM_NOFAIL); 5638168404Spjd 5639236143Smm if (ztest_opts.zo_verbose >= 4) 5640168404Spjd (void) printf("starting main threads...\n"); 5641168404Spjd 5642219089Spjd /* 5643219089Spjd * Kick off all the tests that run in parallel. 5644219089Spjd */ 5645236143Smm for (int t = 0; t < ztest_opts.zo_threads; t++) { 5646236143Smm if (t < ztest_opts.zo_datasets && 5647236143Smm ztest_dataset_open(t) != 0) 5648219089Spjd return; 5649219089Spjd VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, 5650219089Spjd THR_BOUND, &tid[t]) == 0); 5651219089Spjd } 5652168404Spjd 5653219089Spjd /* 5654219089Spjd * Wait for all of the tests to complete. We go in reverse order 5655219089Spjd * so we don't close datasets while threads are still using them. 5656219089Spjd */ 5657236143Smm for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) { 5658219089Spjd VERIFY(thr_join(tid[t], NULL, NULL) == 0); 5659236143Smm if (t < ztest_opts.zo_datasets) 5660236143Smm ztest_dataset_close(t); 5661219089Spjd } 5662185029Spjd 5663219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5664185029Spjd 5665219089Spjd zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 5666219089Spjd zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); 5667254112Sdelphij zfs_dbgmsg_print(FTAG); 5668168404Spjd 5669236143Smm umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t)); 5670168404Spjd 5671219089Spjd /* Kill the resume thread */ 5672219089Spjd ztest_exiting = B_TRUE; 5673219089Spjd VERIFY(thr_join(resume_tid, NULL, NULL) == 0); 5674219089Spjd ztest_resume(spa); 5675219089Spjd 5676219089Spjd /* 5677219089Spjd * Right before closing the pool, kick off a bunch of async I/O; 5678219089Spjd * spa_close() should wait for it to complete. 5679219089Spjd */ 5680219089Spjd for (uint64_t object = 1; object < 50; object++) 5681219089Spjd dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20); 5682219089Spjd 5683219089Spjd spa_close(spa, FTAG); 5684219089Spjd 5685219089Spjd /* 5686219089Spjd * Verify that we can loop over all pools. 5687219089Spjd */ 5688219089Spjd mutex_enter(&spa_namespace_lock); 5689219089Spjd for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) 5690236143Smm if (ztest_opts.zo_verbose > 3) 5691219089Spjd (void) printf("spa_next: found %s\n", spa_name(spa)); 5692219089Spjd mutex_exit(&spa_namespace_lock); 5693219089Spjd 5694219089Spjd /* 5695219089Spjd * Verify that we can export the pool and reimport it under a 5696219089Spjd * different name. 5697219089Spjd */ 5698219089Spjd if (ztest_random(2) == 0) { 5699219089Spjd char name[MAXNAMELEN]; 5700236143Smm (void) snprintf(name, MAXNAMELEN, "%s_import", 5701236143Smm ztest_opts.zo_pool); 5702236143Smm ztest_spa_import_export(ztest_opts.zo_pool, name); 5703236143Smm ztest_spa_import_export(name, ztest_opts.zo_pool); 5704168404Spjd } 5705168404Spjd 5706219089Spjd kernel_fini(); 5707219089Spjd 5708219089Spjd list_destroy(&zcl.zcl_callbacks); 5709219089Spjd 5710219089Spjd (void) _mutex_destroy(&zcl.zcl_callbacks_lock); 5711219089Spjd 5712236143Smm (void) rwlock_destroy(&ztest_name_lock); 5713236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5714219089Spjd} 5715219089Spjd 5716219089Spjdstatic void 5717236143Smmztest_freeze(void) 5718219089Spjd{ 5719236143Smm ztest_ds_t *zd = &ztest_ds[0]; 5720219089Spjd spa_t *spa; 5721219089Spjd int numloops = 0; 5722219089Spjd 5723236143Smm if (ztest_opts.zo_verbose >= 3) 5724219089Spjd (void) printf("testing spa_freeze()...\n"); 5725168404Spjd 5726219089Spjd kernel_init(FREAD | FWRITE); 5727236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5728236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5729243524Smm spa->spa_debug = B_TRUE; 5730243524Smm ztest_spa = spa; 5731168404Spjd 5732168404Spjd /* 5733219089Spjd * Force the first log block to be transactionally allocated. 5734219089Spjd * We have to do this before we freeze the pool -- otherwise 5735219089Spjd * the log chain won't be anchored. 5736168404Spjd */ 5737219089Spjd while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) { 5738219089Spjd ztest_dmu_object_alloc_free(zd, 0); 5739219089Spjd zil_commit(zd->zd_zilog, 0); 5740168404Spjd } 5741168404Spjd 5742168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5743168404Spjd 5744219089Spjd /* 5745219089Spjd * Freeze the pool. This stops spa_sync() from doing anything, 5746219089Spjd * so that the only way to record changes from now on is the ZIL. 5747219089Spjd */ 5748219089Spjd spa_freeze(spa); 5749185029Spjd 5750219089Spjd /* 5751219089Spjd * Run tests that generate log records but don't alter the pool config 5752219089Spjd * or depend on DSL sync tasks (snapshots, objset create/destroy, etc). 5753219089Spjd * We do a txg_wait_synced() after each iteration to force the txg 5754219089Spjd * to increase well beyond the last synced value in the uberblock. 5755219089Spjd * The ZIL should be OK with that. 5756219089Spjd */ 5757236143Smm while (ztest_random(10) != 0 && 5758236143Smm numloops++ < ztest_opts.zo_maxloops) { 5759219089Spjd ztest_dmu_write_parallel(zd, 0); 5760219089Spjd ztest_dmu_object_alloc_free(zd, 0); 5761219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5762219089Spjd } 5763185029Spjd 5764168404Spjd /* 5765219089Spjd * Commit all of the changes we just generated. 5766168404Spjd */ 5767219089Spjd zil_commit(zd->zd_zilog, 0); 5768219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5769168404Spjd 5770219089Spjd /* 5771219089Spjd * Close our dataset and close the pool. 5772219089Spjd */ 5773236143Smm ztest_dataset_close(0); 5774168404Spjd spa_close(spa, FTAG); 5775219089Spjd kernel_fini(); 5776168404Spjd 5777219089Spjd /* 5778219089Spjd * Open and close the pool and dataset to induce log replay. 5779219089Spjd */ 5780219089Spjd kernel_init(FREAD | FWRITE); 5781236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5782239620Smm ASSERT(spa_freeze_txg(spa) == UINT64_MAX); 5783236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5784236143Smm ztest_dataset_close(0); 5785239620Smm 5786239620Smm spa->spa_debug = B_TRUE; 5787239620Smm ztest_spa = spa; 5788239620Smm txg_wait_synced(spa_get_dsl(spa), 0); 5789239620Smm ztest_reguid(NULL, 0); 5790239620Smm 5791219089Spjd spa_close(spa, FTAG); 5792168404Spjd kernel_fini(); 5793168404Spjd} 5794168404Spjd 5795168404Spjdvoid 5796168404Spjdprint_time(hrtime_t t, char *timebuf) 5797168404Spjd{ 5798168404Spjd hrtime_t s = t / NANOSEC; 5799168404Spjd hrtime_t m = s / 60; 5800168404Spjd hrtime_t h = m / 60; 5801168404Spjd hrtime_t d = h / 24; 5802168404Spjd 5803168404Spjd s -= m * 60; 5804168404Spjd m -= h * 60; 5805168404Spjd h -= d * 24; 5806168404Spjd 5807168404Spjd timebuf[0] = '\0'; 5808168404Spjd 5809168404Spjd if (d) 5810168404Spjd (void) sprintf(timebuf, 5811168404Spjd "%llud%02lluh%02llum%02llus", d, h, m, s); 5812168404Spjd else if (h) 5813168404Spjd (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s); 5814168404Spjd else if (m) 5815168404Spjd (void) sprintf(timebuf, "%llum%02llus", m, s); 5816168404Spjd else 5817168404Spjd (void) sprintf(timebuf, "%llus", s); 5818168404Spjd} 5819168404Spjd 5820219089Spjdstatic nvlist_t * 5821219089Spjdmake_random_props() 5822219089Spjd{ 5823219089Spjd nvlist_t *props; 5824219089Spjd 5825236884Smm VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 5826219089Spjd if (ztest_random(2) == 0) 5827236884Smm return (props); 5828219089Spjd VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0); 5829219089Spjd 5830219089Spjd return (props); 5831219089Spjd} 5832219089Spjd 5833168404Spjd/* 5834168404Spjd * Create a storage pool with the given name and initial vdev size. 5835219089Spjd * Then test spa_freeze() functionality. 5836168404Spjd */ 5837168404Spjdstatic void 5838219089Spjdztest_init(ztest_shared_t *zs) 5839168404Spjd{ 5840168404Spjd spa_t *spa; 5841219089Spjd nvlist_t *nvroot, *props; 5842168404Spjd 5843236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5844236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5845219089Spjd 5846168404Spjd kernel_init(FREAD | FWRITE); 5847168404Spjd 5848168404Spjd /* 5849168404Spjd * Create the storage pool. 5850168404Spjd */ 5851236143Smm (void) spa_destroy(ztest_opts.zo_pool); 5852219089Spjd ztest_shared->zs_vdev_next_leaf = 0; 5853219089Spjd zs->zs_splits = 0; 5854236143Smm zs->zs_mirrors = ztest_opts.zo_mirrors; 5855243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, 5856236143Smm 0, ztest_opts.zo_raidz, zs->zs_mirrors, 1); 5857219089Spjd props = make_random_props(); 5858236884Smm for (int i = 0; i < SPA_FEATURES; i++) { 5859236884Smm char buf[1024]; 5860236884Smm (void) snprintf(buf, sizeof (buf), "feature@%s", 5861236884Smm spa_feature_table[i].fi_uname); 5862236884Smm VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); 5863236884Smm } 5864248571Smm VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); 5865168404Spjd nvlist_free(nvroot); 5866168404Spjd 5867236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5868236143Smm zs->zs_metaslab_sz = 5869236143Smm 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; 5870236884Smm 5871219089Spjd spa_close(spa, FTAG); 5872209962Smm 5873219089Spjd kernel_fini(); 5874168404Spjd 5875236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5876168404Spjd 5877236143Smm ztest_freeze(); 5878219089Spjd 5879236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5880219089Spjd 5881236143Smm (void) rwlock_destroy(&ztest_name_lock); 5882236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5883168404Spjd} 5884168404Spjd 5885236143Smmstatic void 5886242845Sdelphijsetup_data_fd(void) 5887236143Smm{ 5888242845Sdelphij static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX"; 5889236143Smm 5890242845Sdelphij ztest_fd_data = mkstemp(ztest_name_data); 5891242845Sdelphij ASSERT3S(ztest_fd_data, >=, 0); 5892242845Sdelphij (void) unlink(ztest_name_data); 5893242845Sdelphij} 5894236143Smm 5895236143Smm 5896236884Smmstatic int 5897236884Smmshared_data_size(ztest_shared_hdr_t *hdr) 5898236884Smm{ 5899236884Smm int size; 5900236884Smm 5901236884Smm size = hdr->zh_hdr_size; 5902236884Smm size += hdr->zh_opts_size; 5903236884Smm size += hdr->zh_size; 5904236884Smm size += hdr->zh_stats_size * hdr->zh_stats_count; 5905236884Smm size += hdr->zh_ds_size * hdr->zh_ds_count; 5906236884Smm 5907236884Smm return (size); 5908236884Smm} 5909236884Smm 5910236143Smmstatic void 5911236143Smmsetup_hdr(void) 5912236143Smm{ 5913236884Smm int size; 5914236143Smm ztest_shared_hdr_t *hdr; 5915236143Smm 5916236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5917242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 5918236143Smm ASSERT(hdr != MAP_FAILED); 5919236143Smm 5920242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t))); 5921236884Smm 5922236143Smm hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t); 5923236143Smm hdr->zh_opts_size = sizeof (ztest_shared_opts_t); 5924236143Smm hdr->zh_size = sizeof (ztest_shared_t); 5925236143Smm hdr->zh_stats_size = sizeof (ztest_shared_callstate_t); 5926236143Smm hdr->zh_stats_count = ZTEST_FUNCS; 5927236143Smm hdr->zh_ds_size = sizeof (ztest_shared_ds_t); 5928236143Smm hdr->zh_ds_count = ztest_opts.zo_datasets; 5929236143Smm 5930236884Smm size = shared_data_size(hdr); 5931242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, size)); 5932236884Smm 5933236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5934236143Smm} 5935236143Smm 5936236143Smmstatic void 5937236143Smmsetup_data(void) 5938236143Smm{ 5939236143Smm int size, offset; 5940236143Smm ztest_shared_hdr_t *hdr; 5941236143Smm uint8_t *buf; 5942236143Smm 5943236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5944242845Sdelphij PROT_READ, MAP_SHARED, ztest_fd_data, 0); 5945236143Smm ASSERT(hdr != MAP_FAILED); 5946236143Smm 5947236884Smm size = shared_data_size(hdr); 5948236143Smm 5949236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5950236143Smm hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()), 5951242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 5952236143Smm ASSERT(hdr != MAP_FAILED); 5953236143Smm buf = (uint8_t *)hdr; 5954236143Smm 5955236143Smm offset = hdr->zh_hdr_size; 5956236143Smm ztest_shared_opts = (void *)&buf[offset]; 5957236143Smm offset += hdr->zh_opts_size; 5958236143Smm ztest_shared = (void *)&buf[offset]; 5959236143Smm offset += hdr->zh_size; 5960236143Smm ztest_shared_callstate = (void *)&buf[offset]; 5961236143Smm offset += hdr->zh_stats_size * hdr->zh_stats_count; 5962236143Smm ztest_shared_ds = (void *)&buf[offset]; 5963236143Smm} 5964236143Smm 5965236143Smmstatic boolean_t 5966236143Smmexec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp) 5967236143Smm{ 5968236143Smm pid_t pid; 5969236143Smm int status; 5970242845Sdelphij char *cmdbuf = NULL; 5971236143Smm 5972236143Smm pid = fork(); 5973236143Smm 5974236143Smm if (cmd == NULL) { 5975242845Sdelphij cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 5976242845Sdelphij (void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN); 5977236143Smm cmd = cmdbuf; 5978236143Smm } 5979236143Smm 5980236143Smm if (pid == -1) 5981236143Smm fatal(1, "fork failed"); 5982236143Smm 5983236143Smm if (pid == 0) { /* child */ 5984236143Smm char *emptyargv[2] = { cmd, NULL }; 5985242845Sdelphij char fd_data_str[12]; 5986236143Smm 5987236143Smm struct rlimit rl = { 1024, 1024 }; 5988236143Smm (void) setrlimit(RLIMIT_NOFILE, &rl); 5989242845Sdelphij 5990242845Sdelphij (void) close(ztest_fd_rand); 5991242845Sdelphij VERIFY3U(11, >=, 5992242845Sdelphij snprintf(fd_data_str, 12, "%d", ztest_fd_data)); 5993242845Sdelphij VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1)); 5994242845Sdelphij 5995236143Smm (void) enable_extended_FILE_stdio(-1, -1); 5996236143Smm if (libpath != NULL) 5997236143Smm VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1)); 5998236143Smm#ifdef illumos 5999236143Smm (void) execv(cmd, emptyargv); 6000236143Smm#else 6001236143Smm (void) execvp(cmd, emptyargv); 6002236143Smm#endif 6003236143Smm ztest_dump_core = B_FALSE; 6004236143Smm fatal(B_TRUE, "exec failed: %s", cmd); 6005236143Smm } 6006236143Smm 6007242845Sdelphij if (cmdbuf != NULL) { 6008242845Sdelphij umem_free(cmdbuf, MAXPATHLEN); 6009242845Sdelphij cmd = NULL; 6010242845Sdelphij } 6011242845Sdelphij 6012236143Smm while (waitpid(pid, &status, 0) != pid) 6013236143Smm continue; 6014236143Smm if (statusp != NULL) 6015236143Smm *statusp = status; 6016236143Smm 6017236143Smm if (WIFEXITED(status)) { 6018236143Smm if (WEXITSTATUS(status) != 0) { 6019236143Smm (void) fprintf(stderr, "child exited with code %d\n", 6020236143Smm WEXITSTATUS(status)); 6021236143Smm exit(2); 6022236143Smm } 6023236143Smm return (B_FALSE); 6024236143Smm } else if (WIFSIGNALED(status)) { 6025236143Smm if (!ignorekill || WTERMSIG(status) != SIGKILL) { 6026236143Smm (void) fprintf(stderr, "child died with signal %d\n", 6027236143Smm WTERMSIG(status)); 6028236143Smm exit(3); 6029236143Smm } 6030236143Smm return (B_TRUE); 6031236143Smm } else { 6032236143Smm (void) fprintf(stderr, "something strange happened to child\n"); 6033236143Smm exit(4); 6034236143Smm /* NOTREACHED */ 6035236143Smm } 6036236143Smm} 6037236143Smm 6038236143Smmstatic void 6039236143Smmztest_run_init(void) 6040236143Smm{ 6041236143Smm ztest_shared_t *zs = ztest_shared; 6042236143Smm 6043236143Smm ASSERT(ztest_opts.zo_init != 0); 6044236143Smm 6045236143Smm /* 6046236143Smm * Blow away any existing copy of zpool.cache 6047236143Smm */ 6048236143Smm (void) remove(spa_config_path); 6049236143Smm 6050236143Smm /* 6051236143Smm * Create and initialize our storage pool. 6052236143Smm */ 6053236143Smm for (int i = 1; i <= ztest_opts.zo_init; i++) { 6054236143Smm bzero(zs, sizeof (ztest_shared_t)); 6055236143Smm if (ztest_opts.zo_verbose >= 3 && 6056236143Smm ztest_opts.zo_init != 1) { 6057236143Smm (void) printf("ztest_init(), pass %d\n", i); 6058236143Smm } 6059236143Smm ztest_init(zs); 6060236143Smm } 6061236143Smm} 6062236143Smm 6063168404Spjdint 6064168404Spjdmain(int argc, char **argv) 6065168404Spjd{ 6066168404Spjd int kills = 0; 6067168404Spjd int iters = 0; 6068236143Smm int older = 0; 6069236143Smm int newer = 0; 6070168404Spjd ztest_shared_t *zs; 6071168404Spjd ztest_info_t *zi; 6072236143Smm ztest_shared_callstate_t *zc; 6073168404Spjd char timebuf[100]; 6074168404Spjd char numbuf[6]; 6075219089Spjd spa_t *spa; 6076242845Sdelphij char *cmd; 6077236143Smm boolean_t hasalt; 6078242845Sdelphij char *fd_data_str = getenv("ZTEST_FD_DATA"); 6079168404Spjd 6080168404Spjd (void) setvbuf(stdout, NULL, _IOLBF, 0); 6081168404Spjd 6082240133Smm dprintf_setup(&argc, argv); 6083258632Savg zfs_deadman_synctime_ms = 300000; 6084240133Smm 6085242845Sdelphij ztest_fd_rand = open("/dev/urandom", O_RDONLY); 6086242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 6087242845Sdelphij 6088242845Sdelphij if (!fd_data_str) { 6089236143Smm process_options(argc, argv); 6090168404Spjd 6091242845Sdelphij setup_data_fd(); 6092236143Smm setup_hdr(); 6093236143Smm setup_data(); 6094236143Smm bcopy(&ztest_opts, ztest_shared_opts, 6095236143Smm sizeof (*ztest_shared_opts)); 6096236143Smm } else { 6097242845Sdelphij ztest_fd_data = atoi(fd_data_str); 6098236143Smm setup_data(); 6099236143Smm bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts)); 6100236143Smm } 6101236143Smm ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count); 6102168404Spjd 6103219089Spjd /* Override location of zpool.cache */ 6104242845Sdelphij VERIFY3U(asprintf((char **)&spa_config_path, "%s/zpool.cache", 6105242845Sdelphij ztest_opts.zo_dir), !=, -1); 6106219089Spjd 6107236143Smm ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t), 6108236143Smm UMEM_NOFAIL); 6109236143Smm zs = ztest_shared; 6110168404Spjd 6111242845Sdelphij if (fd_data_str) { 6112236143Smm metaslab_gang_bang = ztest_opts.zo_metaslab_gang_bang; 6113236143Smm metaslab_df_alloc_threshold = 6114236143Smm zs->zs_metaslab_df_alloc_threshold; 6115219089Spjd 6116236143Smm if (zs->zs_do_init) 6117236143Smm ztest_run_init(); 6118236143Smm else 6119236143Smm ztest_run(zs); 6120236143Smm exit(0); 6121236143Smm } 6122168404Spjd 6123236143Smm hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0); 6124236143Smm 6125236143Smm if (ztest_opts.zo_verbose >= 1) { 6126168404Spjd (void) printf("%llu vdevs, %d datasets, %d threads," 6127168404Spjd " %llu seconds...\n", 6128236143Smm (u_longlong_t)ztest_opts.zo_vdevs, 6129236143Smm ztest_opts.zo_datasets, 6130236143Smm ztest_opts.zo_threads, 6131236143Smm (u_longlong_t)ztest_opts.zo_time); 6132168404Spjd } 6133168404Spjd 6134242845Sdelphij cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); 6135242845Sdelphij (void) strlcpy(cmd, getexecname(), MAXNAMELEN); 6136236143Smm 6137236143Smm zs->zs_do_init = B_TRUE; 6138236143Smm if (strlen(ztest_opts.zo_alt_ztest) != 0) { 6139236143Smm if (ztest_opts.zo_verbose >= 1) { 6140236143Smm (void) printf("Executing older ztest for " 6141236143Smm "initialization: %s\n", ztest_opts.zo_alt_ztest); 6142236143Smm } 6143236143Smm VERIFY(!exec_child(ztest_opts.zo_alt_ztest, 6144236143Smm ztest_opts.zo_alt_libpath, B_FALSE, NULL)); 6145236143Smm } else { 6146236143Smm VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL)); 6147168404Spjd } 6148236143Smm zs->zs_do_init = B_FALSE; 6149168404Spjd 6150219089Spjd zs->zs_proc_start = gethrtime(); 6151236143Smm zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC; 6152219089Spjd 6153219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6154236143Smm zi = &ztest_info[f]; 6155236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6156219089Spjd if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop) 6157236143Smm zc->zc_next = UINT64_MAX; 6158168404Spjd else 6159236143Smm zc->zc_next = zs->zs_proc_start + 6160219089Spjd ztest_random(2 * zi->zi_interval[0] + 1); 6161168404Spjd } 6162168404Spjd 6163168404Spjd /* 6164168404Spjd * Run the tests in a loop. These tests include fault injection 6165168404Spjd * to verify that self-healing data works, and forced crashes 6166168404Spjd * to verify that we never lose on-disk consistency. 6167168404Spjd */ 6168219089Spjd while (gethrtime() < zs->zs_proc_stop) { 6169168404Spjd int status; 6170236143Smm boolean_t killed; 6171168404Spjd 6172168404Spjd /* 6173168404Spjd * Initialize the workload counters for each function. 6174168404Spjd */ 6175219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6176236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6177236143Smm zc->zc_count = 0; 6178236143Smm zc->zc_time = 0; 6179168404Spjd } 6180168404Spjd 6181209962Smm /* Set the allocation switch size */ 6182236143Smm zs->zs_metaslab_df_alloc_threshold = 6183236143Smm ztest_random(zs->zs_metaslab_sz / 4) + 1; 6184209962Smm 6185236143Smm if (!hasalt || ztest_random(2) == 0) { 6186236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6187236143Smm (void) printf("Executing newer ztest: %s\n", 6188236143Smm cmd); 6189168404Spjd } 6190236143Smm newer++; 6191236143Smm killed = exec_child(cmd, NULL, B_TRUE, &status); 6192236143Smm } else { 6193236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6194236143Smm (void) printf("Executing older ztest: %s\n", 6195236143Smm ztest_opts.zo_alt_ztest); 6196168404Spjd } 6197236143Smm older++; 6198236143Smm killed = exec_child(ztest_opts.zo_alt_ztest, 6199236143Smm ztest_opts.zo_alt_libpath, B_TRUE, &status); 6200168404Spjd } 6201168404Spjd 6202236143Smm if (killed) 6203236143Smm kills++; 6204168404Spjd iters++; 6205168404Spjd 6206236143Smm if (ztest_opts.zo_verbose >= 1) { 6207168404Spjd hrtime_t now = gethrtime(); 6208168404Spjd 6209219089Spjd now = MIN(now, zs->zs_proc_stop); 6210219089Spjd print_time(zs->zs_proc_stop - now, timebuf); 6211168404Spjd nicenum(zs->zs_space, numbuf); 6212168404Spjd 6213168404Spjd (void) printf("Pass %3d, %8s, %3llu ENOSPC, " 6214168404Spjd "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n", 6215168404Spjd iters, 6216168404Spjd WIFEXITED(status) ? "Complete" : "SIGKILL", 6217168404Spjd (u_longlong_t)zs->zs_enospc_count, 6218168404Spjd 100.0 * zs->zs_alloc / zs->zs_space, 6219168404Spjd numbuf, 6220219089Spjd 100.0 * (now - zs->zs_proc_start) / 6221236143Smm (ztest_opts.zo_time * NANOSEC), timebuf); 6222168404Spjd } 6223168404Spjd 6224236143Smm if (ztest_opts.zo_verbose >= 2) { 6225168404Spjd (void) printf("\nWorkload summary:\n\n"); 6226168404Spjd (void) printf("%7s %9s %s\n", 6227168404Spjd "Calls", "Time", "Function"); 6228168404Spjd (void) printf("%7s %9s %s\n", 6229168404Spjd "-----", "----", "--------"); 6230219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6231168404Spjd Dl_info dli; 6232168404Spjd 6233236143Smm zi = &ztest_info[f]; 6234236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6235236143Smm print_time(zc->zc_time, timebuf); 6236168404Spjd (void) dladdr((void *)zi->zi_func, &dli); 6237168404Spjd (void) printf("%7llu %9s %s\n", 6238236143Smm (u_longlong_t)zc->zc_count, timebuf, 6239168404Spjd dli.dli_sname); 6240168404Spjd } 6241168404Spjd (void) printf("\n"); 6242168404Spjd } 6243168404Spjd 6244168404Spjd /* 6245219089Spjd * It's possible that we killed a child during a rename test, 6246219089Spjd * in which case we'll have a 'ztest_tmp' pool lying around 6247219089Spjd * instead of 'ztest'. Do a blind rename in case this happened. 6248168404Spjd */ 6249219089Spjd kernel_init(FREAD); 6250236143Smm if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) { 6251219089Spjd spa_close(spa, FTAG); 6252219089Spjd } else { 6253219089Spjd char tmpname[MAXNAMELEN]; 6254219089Spjd kernel_fini(); 6255219089Spjd kernel_init(FREAD | FWRITE); 6256219089Spjd (void) snprintf(tmpname, sizeof (tmpname), "%s_tmp", 6257236143Smm ztest_opts.zo_pool); 6258236143Smm (void) spa_rename(tmpname, ztest_opts.zo_pool); 6259219089Spjd } 6260168404Spjd kernel_fini(); 6261219089Spjd 6262236143Smm ztest_run_zdb(ztest_opts.zo_pool); 6263168404Spjd } 6264168404Spjd 6265236143Smm if (ztest_opts.zo_verbose >= 1) { 6266236143Smm if (hasalt) { 6267236143Smm (void) printf("%d runs of older ztest: %s\n", older, 6268236143Smm ztest_opts.zo_alt_ztest); 6269236143Smm (void) printf("%d runs of newer ztest: %s\n", newer, 6270236143Smm cmd); 6271236143Smm } 6272168404Spjd (void) printf("%d killed, %d completed, %.0f%% kill rate\n", 6273168404Spjd kills, iters - kills, (100.0 * kills) / MAX(1, iters)); 6274168404Spjd } 6275168404Spjd 6276242845Sdelphij umem_free(cmd, MAXNAMELEN); 6277242845Sdelphij 6278168404Spjd return (0); 6279168404Spjd} 6280