ztest.c revision 286705
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23284304Savg * Copyright (c) 2011, 2015 by Delphix. All rights reserved. 24228103Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 25236143Smm * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. 26251646Sdelphij * Copyright (c) 2013 Steven Hartland. All rights reserved. 27168404Spjd */ 28168404Spjd 29168404Spjd/* 30168404Spjd * The objective of this program is to provide a DMU/ZAP/SPA stress test 31168404Spjd * that runs entirely in userland, is easy to use, and easy to extend. 32168404Spjd * 33168404Spjd * The overall design of the ztest program is as follows: 34168404Spjd * 35168404Spjd * (1) For each major functional area (e.g. adding vdevs to a pool, 36168404Spjd * creating and destroying datasets, reading and writing objects, etc) 37168404Spjd * we have a simple routine to test that functionality. These 38168404Spjd * individual routines do not have to do anything "stressful". 39168404Spjd * 40168404Spjd * (2) We turn these simple functionality tests into a stress test by 41168404Spjd * running them all in parallel, with as many threads as desired, 42168404Spjd * and spread across as many datasets, objects, and vdevs as desired. 43168404Spjd * 44168404Spjd * (3) While all this is happening, we inject faults into the pool to 45168404Spjd * verify that self-healing data really works. 46168404Spjd * 47168404Spjd * (4) Every time we open a dataset, we change its checksum and compression 48168404Spjd * functions. Thus even individual objects vary from block to block 49168404Spjd * in which checksum they use and whether they're compressed. 50168404Spjd * 51168404Spjd * (5) To verify that we never lose on-disk consistency after a crash, 52168404Spjd * we run the entire test in a child of the main process. 53168404Spjd * At random times, the child self-immolates with a SIGKILL. 54168404Spjd * This is the software equivalent of pulling the power cord. 55168404Spjd * The parent then runs the test again, using the existing 56268075Sdelphij * storage pool, as many times as desired. If backwards compatibility 57236143Smm * testing is enabled ztest will sometimes run the "older" version 58236143Smm * of ztest after a SIGKILL. 59168404Spjd * 60168404Spjd * (6) To verify that we don't have future leaks or temporal incursions, 61168404Spjd * many of the functional tests record the transaction group number 62168404Spjd * as part of their data. When reading old data, they verify that 63168404Spjd * the transaction group number is less than the current, open txg. 64168404Spjd * If you add a new test, please do this if applicable. 65168404Spjd * 66168404Spjd * When run with no arguments, ztest runs for about five minutes and 67168404Spjd * produces no output if successful. To get a little bit of information, 68168404Spjd * specify -V. To get more information, specify -VV, and so on. 69168404Spjd * 70168404Spjd * To turn this into an overnight stress test, use -T to specify run time. 71168404Spjd * 72168404Spjd * You can ask more more vdevs [-v], datasets [-d], or threads [-t] 73168404Spjd * to increase the pool capacity, fanout, and overall stress level. 74168404Spjd * 75236143Smm * Use the -k option to set the desired frequency of kills. 76236143Smm * 77236143Smm * When ztest invokes itself it passes all relevant information through a 78236143Smm * temporary file which is mmap-ed in the child process. This allows shared 79236143Smm * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always 80236143Smm * stored at offset 0 of this file and contains information on the size and 81236143Smm * number of shared structures in the file. The information stored in this file 82236143Smm * must remain backwards compatible with older versions of ztest so that 83236143Smm * ztest can invoke them during backwards compatibility testing (-B). 84168404Spjd */ 85168404Spjd 86168404Spjd#include <sys/zfs_context.h> 87168404Spjd#include <sys/spa.h> 88168404Spjd#include <sys/dmu.h> 89168404Spjd#include <sys/txg.h> 90209962Smm#include <sys/dbuf.h> 91168404Spjd#include <sys/zap.h> 92168404Spjd#include <sys/dmu_objset.h> 93168404Spjd#include <sys/poll.h> 94168404Spjd#include <sys/stat.h> 95168404Spjd#include <sys/time.h> 96168404Spjd#include <sys/wait.h> 97168404Spjd#include <sys/mman.h> 98168404Spjd#include <sys/resource.h> 99168404Spjd#include <sys/zio.h> 100168404Spjd#include <sys/zil.h> 101219089Spjd#include <sys/zil_impl.h> 102168404Spjd#include <sys/vdev_impl.h> 103185029Spjd#include <sys/vdev_file.h> 104168404Spjd#include <sys/spa_impl.h> 105219089Spjd#include <sys/metaslab_impl.h> 106168404Spjd#include <sys/dsl_prop.h> 107207910Smm#include <sys/dsl_dataset.h> 108248571Smm#include <sys/dsl_destroy.h> 109219089Spjd#include <sys/dsl_scan.h> 110219089Spjd#include <sys/zio_checksum.h> 111168404Spjd#include <sys/refcount.h> 112236884Smm#include <sys/zfeature.h> 113248571Smm#include <sys/dsl_userhold.h> 114168404Spjd#include <stdio.h> 115168404Spjd#include <stdio_ext.h> 116168404Spjd#include <stdlib.h> 117168404Spjd#include <unistd.h> 118168404Spjd#include <signal.h> 119168404Spjd#include <umem.h> 120168404Spjd#include <dlfcn.h> 121168404Spjd#include <ctype.h> 122168404Spjd#include <math.h> 123168404Spjd#include <errno.h> 124168404Spjd#include <sys/fs/zfs.h> 125219089Spjd#include <libnvpair.h> 126168404Spjd 127242845Sdelphijstatic int ztest_fd_data = -1; 128242845Sdelphijstatic int ztest_fd_rand = -1; 129168404Spjd 130236143Smmtypedef struct ztest_shared_hdr { 131236143Smm uint64_t zh_hdr_size; 132236143Smm uint64_t zh_opts_size; 133236143Smm uint64_t zh_size; 134236143Smm uint64_t zh_stats_size; 135236143Smm uint64_t zh_stats_count; 136236143Smm uint64_t zh_ds_size; 137236143Smm uint64_t zh_ds_count; 138236143Smm} ztest_shared_hdr_t; 139168404Spjd 140236143Smmstatic ztest_shared_hdr_t *ztest_shared_hdr; 141236143Smm 142236143Smmtypedef struct ztest_shared_opts { 143236143Smm char zo_pool[MAXNAMELEN]; 144236143Smm char zo_dir[MAXNAMELEN]; 145236143Smm char zo_alt_ztest[MAXNAMELEN]; 146236143Smm char zo_alt_libpath[MAXNAMELEN]; 147236143Smm uint64_t zo_vdevs; 148236143Smm uint64_t zo_vdevtime; 149236143Smm size_t zo_vdev_size; 150236143Smm int zo_ashift; 151236143Smm int zo_mirrors; 152236143Smm int zo_raidz; 153236143Smm int zo_raidz_parity; 154236143Smm int zo_datasets; 155236143Smm int zo_threads; 156236143Smm uint64_t zo_passtime; 157236143Smm uint64_t zo_killrate; 158236143Smm int zo_verbose; 159236143Smm int zo_init; 160236143Smm uint64_t zo_time; 161236143Smm uint64_t zo_maxloops; 162236143Smm uint64_t zo_metaslab_gang_bang; 163236143Smm} ztest_shared_opts_t; 164236143Smm 165236143Smmstatic const ztest_shared_opts_t ztest_opts_defaults = { 166236143Smm .zo_pool = { 'z', 't', 'e', 's', 't', '\0' }, 167236143Smm .zo_dir = { '/', 't', 'm', 'p', '\0' }, 168236143Smm .zo_alt_ztest = { '\0' }, 169236143Smm .zo_alt_libpath = { '\0' }, 170236143Smm .zo_vdevs = 5, 171236143Smm .zo_ashift = SPA_MINBLOCKSHIFT, 172236143Smm .zo_mirrors = 2, 173236143Smm .zo_raidz = 4, 174236143Smm .zo_raidz_parity = 1, 175269430Sdelphij .zo_vdev_size = SPA_MINDEVSIZE * 2, 176236143Smm .zo_datasets = 7, 177236143Smm .zo_threads = 23, 178236143Smm .zo_passtime = 60, /* 60 seconds */ 179236143Smm .zo_killrate = 70, /* 70% kill rate */ 180236143Smm .zo_verbose = 0, 181236143Smm .zo_init = 1, 182236143Smm .zo_time = 300, /* 5 minutes */ 183236143Smm .zo_maxloops = 50, /* max loops during spa_freeze() */ 184236143Smm .zo_metaslab_gang_bang = 32 << 10 185236143Smm}; 186236143Smm 187236143Smmextern uint64_t metaslab_gang_bang; 188236143Smmextern uint64_t metaslab_df_alloc_threshold; 189258632Savgextern uint64_t zfs_deadman_synctime_ms; 190268086Sdelphijextern int metaslab_preload_limit; 191236143Smm 192236143Smmstatic ztest_shared_opts_t *ztest_shared_opts; 193236143Smmstatic ztest_shared_opts_t ztest_opts; 194236143Smm 195236143Smmtypedef struct ztest_shared_ds { 196236143Smm uint64_t zd_seq; 197236143Smm} ztest_shared_ds_t; 198236143Smm 199236143Smmstatic ztest_shared_ds_t *ztest_shared_ds; 200236143Smm#define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d]) 201236143Smm 202219089Spjd#define BT_MAGIC 0x123456789abcdefULL 203236143Smm#define MAXFAULTS() \ 204236143Smm (MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1) 205219089Spjd 206219089Spjdenum ztest_io_type { 207219089Spjd ZTEST_IO_WRITE_TAG, 208219089Spjd ZTEST_IO_WRITE_PATTERN, 209219089Spjd ZTEST_IO_WRITE_ZEROES, 210219089Spjd ZTEST_IO_TRUNCATE, 211219089Spjd ZTEST_IO_SETATTR, 212243524Smm ZTEST_IO_REWRITE, 213219089Spjd ZTEST_IO_TYPES 214219089Spjd}; 215219089Spjd 216185029Spjdtypedef struct ztest_block_tag { 217219089Spjd uint64_t bt_magic; 218185029Spjd uint64_t bt_objset; 219185029Spjd uint64_t bt_object; 220185029Spjd uint64_t bt_offset; 221219089Spjd uint64_t bt_gen; 222185029Spjd uint64_t bt_txg; 223219089Spjd uint64_t bt_crtxg; 224185029Spjd} ztest_block_tag_t; 225185029Spjd 226219089Spjdtypedef struct bufwad { 227219089Spjd uint64_t bw_index; 228219089Spjd uint64_t bw_txg; 229219089Spjd uint64_t bw_data; 230219089Spjd} bufwad_t; 231168404Spjd 232219089Spjd/* 233219089Spjd * XXX -- fix zfs range locks to be generic so we can use them here. 234219089Spjd */ 235219089Spjdtypedef enum { 236219089Spjd RL_READER, 237219089Spjd RL_WRITER, 238219089Spjd RL_APPEND 239219089Spjd} rl_type_t; 240168404Spjd 241219089Spjdtypedef struct rll { 242219089Spjd void *rll_writer; 243219089Spjd int rll_readers; 244219089Spjd mutex_t rll_lock; 245219089Spjd cond_t rll_cv; 246219089Spjd} rll_t; 247219089Spjd 248219089Spjdtypedef struct rl { 249219089Spjd uint64_t rl_object; 250219089Spjd uint64_t rl_offset; 251219089Spjd uint64_t rl_size; 252219089Spjd rll_t *rl_lock; 253219089Spjd} rl_t; 254219089Spjd 255219089Spjd#define ZTEST_RANGE_LOCKS 64 256219089Spjd#define ZTEST_OBJECT_LOCKS 64 257219089Spjd 258168404Spjd/* 259219089Spjd * Object descriptor. Used as a template for object lookup/create/remove. 260219089Spjd */ 261219089Spjdtypedef struct ztest_od { 262219089Spjd uint64_t od_dir; 263219089Spjd uint64_t od_object; 264219089Spjd dmu_object_type_t od_type; 265219089Spjd dmu_object_type_t od_crtype; 266219089Spjd uint64_t od_blocksize; 267219089Spjd uint64_t od_crblocksize; 268219089Spjd uint64_t od_gen; 269219089Spjd uint64_t od_crgen; 270219089Spjd char od_name[MAXNAMELEN]; 271219089Spjd} ztest_od_t; 272219089Spjd 273219089Spjd/* 274219089Spjd * Per-dataset state. 275219089Spjd */ 276219089Spjdtypedef struct ztest_ds { 277236143Smm ztest_shared_ds_t *zd_shared; 278219089Spjd objset_t *zd_os; 279224526Smm rwlock_t zd_zilog_lock; 280219089Spjd zilog_t *zd_zilog; 281219089Spjd ztest_od_t *zd_od; /* debugging aid */ 282219089Spjd char zd_name[MAXNAMELEN]; 283219089Spjd mutex_t zd_dirobj_lock; 284219089Spjd rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; 285219089Spjd rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; 286219089Spjd} ztest_ds_t; 287219089Spjd 288219089Spjd/* 289219089Spjd * Per-iteration state. 290219089Spjd */ 291219089Spjdtypedef void ztest_func_t(ztest_ds_t *zd, uint64_t id); 292219089Spjd 293219089Spjdtypedef struct ztest_info { 294219089Spjd ztest_func_t *zi_func; /* test function */ 295219089Spjd uint64_t zi_iters; /* iterations per execution */ 296219089Spjd uint64_t *zi_interval; /* execute every <interval> seconds */ 297219089Spjd} ztest_info_t; 298219089Spjd 299236143Smmtypedef struct ztest_shared_callstate { 300236143Smm uint64_t zc_count; /* per-pass count */ 301236143Smm uint64_t zc_time; /* per-pass time */ 302236143Smm uint64_t zc_next; /* next time to call this function */ 303236143Smm} ztest_shared_callstate_t; 304236143Smm 305236143Smmstatic ztest_shared_callstate_t *ztest_shared_callstate; 306236143Smm#define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c]) 307236143Smm 308219089Spjd/* 309168404Spjd * Note: these aren't static because we want dladdr() to work. 310168404Spjd */ 311168404Spjdztest_func_t ztest_dmu_read_write; 312168404Spjdztest_func_t ztest_dmu_write_parallel; 313168404Spjdztest_func_t ztest_dmu_object_alloc_free; 314219089Spjdztest_func_t ztest_dmu_commit_callbacks; 315168404Spjdztest_func_t ztest_zap; 316168404Spjdztest_func_t ztest_zap_parallel; 317219089Spjdztest_func_t ztest_zil_commit; 318224526Smmztest_func_t ztest_zil_remount; 319219089Spjdztest_func_t ztest_dmu_read_write_zcopy; 320168404Spjdztest_func_t ztest_dmu_objset_create_destroy; 321219089Spjdztest_func_t ztest_dmu_prealloc; 322219089Spjdztest_func_t ztest_fzap; 323168404Spjdztest_func_t ztest_dmu_snapshot_create_destroy; 324219089Spjdztest_func_t ztest_dsl_prop_get_set; 325219089Spjdztest_func_t ztest_spa_prop_get_set; 326168404Spjdztest_func_t ztest_spa_create_destroy; 327168404Spjdztest_func_t ztest_fault_inject; 328219089Spjdztest_func_t ztest_ddt_repair; 329219089Spjdztest_func_t ztest_dmu_snapshot_hold; 330185029Spjdztest_func_t ztest_spa_rename; 331219089Spjdztest_func_t ztest_scrub; 332219089Spjdztest_func_t ztest_dsl_dataset_promote_busy; 333168404Spjdztest_func_t ztest_vdev_attach_detach; 334168404Spjdztest_func_t ztest_vdev_LUN_growth; 335168404Spjdztest_func_t ztest_vdev_add_remove; 336185029Spjdztest_func_t ztest_vdev_aux_add_remove; 337219089Spjdztest_func_t ztest_split_pool; 338228103Smmztest_func_t ztest_reguid; 339243505Smmztest_func_t ztest_spa_upgrade; 340168404Spjd 341219089Spjduint64_t zopt_always = 0ULL * NANOSEC; /* all the time */ 342219089Spjduint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */ 343219089Spjduint64_t zopt_often = 1ULL * NANOSEC; /* every second */ 344219089Spjduint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */ 345219089Spjduint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */ 346168404Spjd 347168404Spjdztest_info_t ztest_info[] = { 348185029Spjd { ztest_dmu_read_write, 1, &zopt_always }, 349219089Spjd { ztest_dmu_write_parallel, 10, &zopt_always }, 350185029Spjd { ztest_dmu_object_alloc_free, 1, &zopt_always }, 351219089Spjd { ztest_dmu_commit_callbacks, 1, &zopt_always }, 352185029Spjd { ztest_zap, 30, &zopt_always }, 353185029Spjd { ztest_zap_parallel, 100, &zopt_always }, 354219089Spjd { ztest_split_pool, 1, &zopt_always }, 355219089Spjd { ztest_zil_commit, 1, &zopt_incessant }, 356224526Smm { ztest_zil_remount, 1, &zopt_sometimes }, 357219089Spjd { ztest_dmu_read_write_zcopy, 1, &zopt_often }, 358219089Spjd { ztest_dmu_objset_create_destroy, 1, &zopt_often }, 359219089Spjd { ztest_dsl_prop_get_set, 1, &zopt_often }, 360219089Spjd { ztest_spa_prop_get_set, 1, &zopt_sometimes }, 361219089Spjd#if 0 362219089Spjd { ztest_dmu_prealloc, 1, &zopt_sometimes }, 363219089Spjd#endif 364219089Spjd { ztest_fzap, 1, &zopt_sometimes }, 365219089Spjd { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, 366219089Spjd { ztest_spa_create_destroy, 1, &zopt_sometimes }, 367185029Spjd { ztest_fault_inject, 1, &zopt_sometimes }, 368219089Spjd { ztest_ddt_repair, 1, &zopt_sometimes }, 369219089Spjd { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, 370254074Sdelphij { ztest_reguid, 1, &zopt_rarely }, 371185029Spjd { ztest_spa_rename, 1, &zopt_rarely }, 372219089Spjd { ztest_scrub, 1, &zopt_rarely }, 373243505Smm { ztest_spa_upgrade, 1, &zopt_rarely }, 374219089Spjd { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, 375248571Smm { ztest_vdev_attach_detach, 1, &zopt_sometimes }, 376185029Spjd { ztest_vdev_LUN_growth, 1, &zopt_rarely }, 377236143Smm { ztest_vdev_add_remove, 1, 378236143Smm &ztest_opts.zo_vdevtime }, 379236143Smm { ztest_vdev_aux_add_remove, 1, 380236143Smm &ztest_opts.zo_vdevtime }, 381168404Spjd}; 382168404Spjd 383168404Spjd#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) 384168404Spjd 385219089Spjd/* 386219089Spjd * The following struct is used to hold a list of uncalled commit callbacks. 387219089Spjd * The callbacks are ordered by txg number. 388219089Spjd */ 389219089Spjdtypedef struct ztest_cb_list { 390219089Spjd mutex_t zcl_callbacks_lock; 391219089Spjd list_t zcl_callbacks; 392219089Spjd} ztest_cb_list_t; 393168404Spjd 394168404Spjd/* 395168404Spjd * Stuff we need to share writably between parent and child. 396168404Spjd */ 397168404Spjdtypedef struct ztest_shared { 398236143Smm boolean_t zs_do_init; 399219089Spjd hrtime_t zs_proc_start; 400219089Spjd hrtime_t zs_proc_stop; 401219089Spjd hrtime_t zs_thread_start; 402219089Spjd hrtime_t zs_thread_stop; 403219089Spjd hrtime_t zs_thread_kill; 404219089Spjd uint64_t zs_enospc_count; 405219089Spjd uint64_t zs_vdev_next_leaf; 406185029Spjd uint64_t zs_vdev_aux; 407168404Spjd uint64_t zs_alloc; 408168404Spjd uint64_t zs_space; 409219089Spjd uint64_t zs_splits; 410219089Spjd uint64_t zs_mirrors; 411236143Smm uint64_t zs_metaslab_sz; 412236143Smm uint64_t zs_metaslab_df_alloc_threshold; 413236143Smm uint64_t zs_guid; 414168404Spjd} ztest_shared_t; 415168404Spjd 416219089Spjd#define ID_PARALLEL -1ULL 417219089Spjd 418168404Spjdstatic char ztest_dev_template[] = "%s/%s.%llua"; 419185029Spjdstatic char ztest_aux_template[] = "%s/%s.%s.%llu"; 420219089Spjdztest_shared_t *ztest_shared; 421168404Spjd 422236143Smmstatic spa_t *ztest_spa = NULL; 423236143Smmstatic ztest_ds_t *ztest_ds; 424168404Spjd 425236143Smmstatic mutex_t ztest_vdev_lock; 426239620Smm 427239620Smm/* 428239620Smm * The ztest_name_lock protects the pool and dataset namespace used by 429239620Smm * the individual tests. To modify the namespace, consumers must grab 430239620Smm * this lock as writer. Grabbing the lock as reader will ensure that the 431239620Smm * namespace does not change while the lock is held. 432239620Smm */ 433236143Smmstatic rwlock_t ztest_name_lock; 434236143Smm 435236143Smmstatic boolean_t ztest_dump_core = B_TRUE; 436185029Spjdstatic boolean_t ztest_exiting; 437168404Spjd 438219089Spjd/* Global commit callback list */ 439219089Spjdstatic ztest_cb_list_t zcl; 440219089Spjd 441219089Spjdenum ztest_object { 442219089Spjd ZTEST_META_DNODE = 0, 443219089Spjd ZTEST_DIROBJ, 444219089Spjd ZTEST_OBJECTS 445219089Spjd}; 446168404Spjd 447168676Spjdstatic void usage(boolean_t) __NORETURN; 448168498Spjd 449168404Spjd/* 450168404Spjd * These libumem hooks provide a reasonable set of defaults for the allocator's 451168404Spjd * debugging facilities. 452168404Spjd */ 453168404Spjdconst char * 454168404Spjd_umem_debug_init() 455168404Spjd{ 456168404Spjd return ("default,verbose"); /* $UMEM_DEBUG setting */ 457168404Spjd} 458168404Spjd 459168404Spjdconst char * 460168404Spjd_umem_logging_init(void) 461168404Spjd{ 462168404Spjd return ("fail,contents"); /* $UMEM_LOGGING setting */ 463168404Spjd} 464168404Spjd 465168404Spjd#define FATAL_MSG_SZ 1024 466168404Spjd 467168404Spjdchar *fatal_msg; 468168404Spjd 469168404Spjdstatic void 470168404Spjdfatal(int do_perror, char *message, ...) 471168404Spjd{ 472168404Spjd va_list args; 473168404Spjd int save_errno = errno; 474168404Spjd char buf[FATAL_MSG_SZ]; 475168404Spjd 476168404Spjd (void) fflush(stdout); 477168404Spjd 478168404Spjd va_start(args, message); 479168404Spjd (void) sprintf(buf, "ztest: "); 480168404Spjd /* LINTED */ 481168404Spjd (void) vsprintf(buf + strlen(buf), message, args); 482168404Spjd va_end(args); 483168404Spjd if (do_perror) { 484168404Spjd (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf), 485168404Spjd ": %s", strerror(save_errno)); 486168404Spjd } 487168404Spjd (void) fprintf(stderr, "%s\n", buf); 488168404Spjd fatal_msg = buf; /* to ease debugging */ 489168404Spjd if (ztest_dump_core) 490168404Spjd abort(); 491168404Spjd exit(3); 492168404Spjd} 493168404Spjd 494168404Spjdstatic int 495168404Spjdstr2shift(const char *buf) 496168404Spjd{ 497168404Spjd const char *ends = "BKMGTPEZ"; 498168404Spjd int i; 499168404Spjd 500168404Spjd if (buf[0] == '\0') 501168404Spjd return (0); 502168404Spjd for (i = 0; i < strlen(ends); i++) { 503168404Spjd if (toupper(buf[0]) == ends[i]) 504168404Spjd break; 505168404Spjd } 506168498Spjd if (i == strlen(ends)) { 507168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", 508168498Spjd buf); 509168498Spjd usage(B_FALSE); 510168498Spjd } 511168404Spjd if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) { 512168404Spjd return (10*i); 513168404Spjd } 514168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf); 515168498Spjd usage(B_FALSE); 516168498Spjd /* NOTREACHED */ 517168404Spjd} 518168404Spjd 519168404Spjdstatic uint64_t 520168404Spjdnicenumtoull(const char *buf) 521168404Spjd{ 522168404Spjd char *end; 523168404Spjd uint64_t val; 524168404Spjd 525168404Spjd val = strtoull(buf, &end, 0); 526168404Spjd if (end == buf) { 527168498Spjd (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf); 528168498Spjd usage(B_FALSE); 529168404Spjd } else if (end[0] == '.') { 530168404Spjd double fval = strtod(buf, &end); 531168404Spjd fval *= pow(2, str2shift(end)); 532168498Spjd if (fval > UINT64_MAX) { 533168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 534168498Spjd buf); 535168498Spjd usage(B_FALSE); 536168498Spjd } 537168404Spjd val = (uint64_t)fval; 538168404Spjd } else { 539168404Spjd int shift = str2shift(end); 540168498Spjd if (shift >= 64 || (val << shift) >> shift != val) { 541168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 542168498Spjd buf); 543168498Spjd usage(B_FALSE); 544168498Spjd } 545168404Spjd val <<= shift; 546168404Spjd } 547168404Spjd return (val); 548168404Spjd} 549168404Spjd 550168404Spjdstatic void 551168498Spjdusage(boolean_t requested) 552168404Spjd{ 553236143Smm const ztest_shared_opts_t *zo = &ztest_opts_defaults; 554236143Smm 555168404Spjd char nice_vdev_size[10]; 556168404Spjd char nice_gang_bang[10]; 557168498Spjd FILE *fp = requested ? stdout : stderr; 558168404Spjd 559236143Smm nicenum(zo->zo_vdev_size, nice_vdev_size); 560236143Smm nicenum(zo->zo_metaslab_gang_bang, nice_gang_bang); 561168404Spjd 562168498Spjd (void) fprintf(fp, "Usage: %s\n" 563168404Spjd "\t[-v vdevs (default: %llu)]\n" 564168404Spjd "\t[-s size_of_each_vdev (default: %s)]\n" 565219089Spjd "\t[-a alignment_shift (default: %d)] use 0 for random\n" 566168404Spjd "\t[-m mirror_copies (default: %d)]\n" 567168404Spjd "\t[-r raidz_disks (default: %d)]\n" 568168404Spjd "\t[-R raidz_parity (default: %d)]\n" 569168404Spjd "\t[-d datasets (default: %d)]\n" 570168404Spjd "\t[-t threads (default: %d)]\n" 571168404Spjd "\t[-g gang_block_threshold (default: %s)]\n" 572219089Spjd "\t[-i init_count (default: %d)] initialize pool i times\n" 573219089Spjd "\t[-k kill_percentage (default: %llu%%)]\n" 574168404Spjd "\t[-p pool_name (default: %s)]\n" 575219089Spjd "\t[-f dir (default: %s)] file directory for vdev files\n" 576219089Spjd "\t[-V] verbose (use multiple times for ever more blather)\n" 577219089Spjd "\t[-E] use existing pool instead of creating new one\n" 578219089Spjd "\t[-T time (default: %llu sec)] total run time\n" 579219089Spjd "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n" 580219089Spjd "\t[-P passtime (default: %llu sec)] time per pass\n" 581236143Smm "\t[-B alt_ztest (default: <none>)] alternate ztest path\n" 582168498Spjd "\t[-h] (print help)\n" 583168404Spjd "", 584236143Smm zo->zo_pool, 585236143Smm (u_longlong_t)zo->zo_vdevs, /* -v */ 586185029Spjd nice_vdev_size, /* -s */ 587236143Smm zo->zo_ashift, /* -a */ 588236143Smm zo->zo_mirrors, /* -m */ 589236143Smm zo->zo_raidz, /* -r */ 590236143Smm zo->zo_raidz_parity, /* -R */ 591236143Smm zo->zo_datasets, /* -d */ 592236143Smm zo->zo_threads, /* -t */ 593185029Spjd nice_gang_bang, /* -g */ 594236143Smm zo->zo_init, /* -i */ 595236143Smm (u_longlong_t)zo->zo_killrate, /* -k */ 596236143Smm zo->zo_pool, /* -p */ 597236143Smm zo->zo_dir, /* -f */ 598236143Smm (u_longlong_t)zo->zo_time, /* -T */ 599236143Smm (u_longlong_t)zo->zo_maxloops, /* -F */ 600236143Smm (u_longlong_t)zo->zo_passtime); 601168498Spjd exit(requested ? 0 : 1); 602168404Spjd} 603168404Spjd 604168404Spjdstatic void 605168404Spjdprocess_options(int argc, char **argv) 606168404Spjd{ 607236143Smm char *path; 608236143Smm ztest_shared_opts_t *zo = &ztest_opts; 609236143Smm 610168404Spjd int opt; 611168404Spjd uint64_t value; 612236143Smm char altdir[MAXNAMELEN] = { 0 }; 613168404Spjd 614236143Smm bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); 615168404Spjd 616168404Spjd while ((opt = getopt(argc, argv, 617236143Smm "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:")) != EOF) { 618168404Spjd value = 0; 619168404Spjd switch (opt) { 620185029Spjd case 'v': 621185029Spjd case 's': 622185029Spjd case 'a': 623185029Spjd case 'm': 624185029Spjd case 'r': 625185029Spjd case 'R': 626185029Spjd case 'd': 627185029Spjd case 't': 628185029Spjd case 'g': 629185029Spjd case 'i': 630185029Spjd case 'k': 631185029Spjd case 'T': 632185029Spjd case 'P': 633219089Spjd case 'F': 634168404Spjd value = nicenumtoull(optarg); 635168404Spjd } 636168404Spjd switch (opt) { 637185029Spjd case 'v': 638236143Smm zo->zo_vdevs = value; 639168404Spjd break; 640185029Spjd case 's': 641236143Smm zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value); 642168404Spjd break; 643185029Spjd case 'a': 644236143Smm zo->zo_ashift = value; 645168404Spjd break; 646185029Spjd case 'm': 647236143Smm zo->zo_mirrors = value; 648168404Spjd break; 649185029Spjd case 'r': 650236143Smm zo->zo_raidz = MAX(1, value); 651168404Spjd break; 652185029Spjd case 'R': 653236143Smm zo->zo_raidz_parity = MIN(MAX(value, 1), 3); 654168404Spjd break; 655185029Spjd case 'd': 656236143Smm zo->zo_datasets = MAX(1, value); 657168404Spjd break; 658185029Spjd case 't': 659236143Smm zo->zo_threads = MAX(1, value); 660168404Spjd break; 661185029Spjd case 'g': 662236143Smm zo->zo_metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, 663236143Smm value); 664168404Spjd break; 665185029Spjd case 'i': 666236143Smm zo->zo_init = value; 667168404Spjd break; 668185029Spjd case 'k': 669236143Smm zo->zo_killrate = value; 670168404Spjd break; 671185029Spjd case 'p': 672236143Smm (void) strlcpy(zo->zo_pool, optarg, 673236143Smm sizeof (zo->zo_pool)); 674168404Spjd break; 675185029Spjd case 'f': 676236143Smm path = realpath(optarg, NULL); 677236143Smm if (path == NULL) { 678236143Smm (void) fprintf(stderr, "error: %s: %s\n", 679236143Smm optarg, strerror(errno)); 680236143Smm usage(B_FALSE); 681236143Smm } else { 682236143Smm (void) strlcpy(zo->zo_dir, path, 683236143Smm sizeof (zo->zo_dir)); 684236143Smm } 685168404Spjd break; 686185029Spjd case 'V': 687236143Smm zo->zo_verbose++; 688168404Spjd break; 689185029Spjd case 'E': 690236143Smm zo->zo_init = 0; 691168404Spjd break; 692185029Spjd case 'T': 693236143Smm zo->zo_time = value; 694168404Spjd break; 695185029Spjd case 'P': 696236143Smm zo->zo_passtime = MAX(1, value); 697168404Spjd break; 698219089Spjd case 'F': 699236143Smm zo->zo_maxloops = MAX(1, value); 700219089Spjd break; 701236143Smm case 'B': 702236143Smm (void) strlcpy(altdir, optarg, sizeof (altdir)); 703236143Smm break; 704185029Spjd case 'h': 705168498Spjd usage(B_TRUE); 706168498Spjd break; 707185029Spjd case '?': 708185029Spjd default: 709168498Spjd usage(B_FALSE); 710168404Spjd break; 711168404Spjd } 712168404Spjd } 713168404Spjd 714236143Smm zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1); 715168404Spjd 716236143Smm zo->zo_vdevtime = 717236143Smm (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs : 718219089Spjd UINT64_MAX >> 2); 719236143Smm 720236143Smm if (strlen(altdir) > 0) { 721242845Sdelphij char *cmd; 722242845Sdelphij char *realaltdir; 723236143Smm char *bin; 724236143Smm char *ztest; 725236143Smm char *isa; 726236143Smm int isalen; 727236143Smm 728242845Sdelphij cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 729242845Sdelphij realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 730242845Sdelphij 731242845Sdelphij VERIFY(NULL != realpath(getexecname(), cmd)); 732236143Smm if (0 != access(altdir, F_OK)) { 733236143Smm ztest_dump_core = B_FALSE; 734236143Smm fatal(B_TRUE, "invalid alternate ztest path: %s", 735236143Smm altdir); 736236143Smm } 737236143Smm VERIFY(NULL != realpath(altdir, realaltdir)); 738236143Smm 739236143Smm /* 740236143Smm * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest". 741236143Smm * We want to extract <isa> to determine if we should use 742236143Smm * 32 or 64 bit binaries. 743236143Smm */ 744236143Smm bin = strstr(cmd, "/usr/bin/"); 745236143Smm ztest = strstr(bin, "/ztest"); 746236143Smm isa = bin + 9; 747236143Smm isalen = ztest - isa; 748236143Smm (void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest), 749236143Smm "%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa); 750236143Smm (void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath), 751236143Smm "%s/usr/lib/%.*s", realaltdir, isalen, isa); 752236143Smm 753236143Smm if (0 != access(zo->zo_alt_ztest, X_OK)) { 754236143Smm ztest_dump_core = B_FALSE; 755236143Smm fatal(B_TRUE, "invalid alternate ztest: %s", 756236143Smm zo->zo_alt_ztest); 757236143Smm } else if (0 != access(zo->zo_alt_libpath, X_OK)) { 758236143Smm ztest_dump_core = B_FALSE; 759236143Smm fatal(B_TRUE, "invalid alternate lib directory %s", 760236143Smm zo->zo_alt_libpath); 761236143Smm } 762242845Sdelphij 763242845Sdelphij umem_free(cmd, MAXPATHLEN); 764242845Sdelphij umem_free(realaltdir, MAXPATHLEN); 765236143Smm } 766168404Spjd} 767168404Spjd 768219089Spjdstatic void 769219089Spjdztest_kill(ztest_shared_t *zs) 770219089Spjd{ 771236143Smm zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa)); 772236143Smm zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa)); 773254112Sdelphij 774254112Sdelphij /* 775254112Sdelphij * Before we kill off ztest, make sure that the config is updated. 776254112Sdelphij * See comment above spa_config_sync(). 777254112Sdelphij */ 778254112Sdelphij mutex_enter(&spa_namespace_lock); 779254112Sdelphij spa_config_sync(ztest_spa, B_FALSE, B_FALSE); 780254112Sdelphij mutex_exit(&spa_namespace_lock); 781254112Sdelphij 782254112Sdelphij zfs_dbgmsg_print(FTAG); 783219089Spjd (void) kill(getpid(), SIGKILL); 784219089Spjd} 785219089Spjd 786168404Spjdstatic uint64_t 787219089Spjdztest_random(uint64_t range) 788219089Spjd{ 789219089Spjd uint64_t r; 790219089Spjd 791242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 792242845Sdelphij 793219089Spjd if (range == 0) 794219089Spjd return (0); 795219089Spjd 796242845Sdelphij if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r)) 797219089Spjd fatal(1, "short read from /dev/urandom"); 798219089Spjd 799219089Spjd return (r % range); 800219089Spjd} 801219089Spjd 802219089Spjd/* ARGSUSED */ 803219089Spjdstatic void 804219089Spjdztest_record_enospc(const char *s) 805219089Spjd{ 806219089Spjd ztest_shared->zs_enospc_count++; 807219089Spjd} 808219089Spjd 809219089Spjdstatic uint64_t 810168404Spjdztest_get_ashift(void) 811168404Spjd{ 812236143Smm if (ztest_opts.zo_ashift == 0) 813268855Sdelphij return (SPA_MINBLOCKSHIFT + ztest_random(5)); 814236143Smm return (ztest_opts.zo_ashift); 815168404Spjd} 816168404Spjd 817168404Spjdstatic nvlist_t * 818243505Smmmake_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift) 819168404Spjd{ 820185029Spjd char pathbuf[MAXPATHLEN]; 821168404Spjd uint64_t vdev; 822168404Spjd nvlist_t *file; 823168404Spjd 824185029Spjd if (ashift == 0) 825185029Spjd ashift = ztest_get_ashift(); 826168404Spjd 827185029Spjd if (path == NULL) { 828185029Spjd path = pathbuf; 829185029Spjd 830185029Spjd if (aux != NULL) { 831185029Spjd vdev = ztest_shared->zs_vdev_aux; 832236143Smm (void) snprintf(path, sizeof (pathbuf), 833236143Smm ztest_aux_template, ztest_opts.zo_dir, 834243505Smm pool == NULL ? ztest_opts.zo_pool : pool, 835243505Smm aux, vdev); 836185029Spjd } else { 837219089Spjd vdev = ztest_shared->zs_vdev_next_leaf++; 838236143Smm (void) snprintf(path, sizeof (pathbuf), 839236143Smm ztest_dev_template, ztest_opts.zo_dir, 840243505Smm pool == NULL ? ztest_opts.zo_pool : pool, vdev); 841185029Spjd } 842185029Spjd } 843185029Spjd 844185029Spjd if (size != 0) { 845185029Spjd int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666); 846168404Spjd if (fd == -1) 847185029Spjd fatal(1, "can't open %s", path); 848168404Spjd if (ftruncate(fd, size) != 0) 849185029Spjd fatal(1, "can't ftruncate %s", path); 850168404Spjd (void) close(fd); 851168404Spjd } 852168404Spjd 853168404Spjd VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); 854168404Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); 855185029Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0); 856168404Spjd VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0); 857168404Spjd 858168404Spjd return (file); 859168404Spjd} 860168404Spjd 861168404Spjdstatic nvlist_t * 862243505Smmmake_vdev_raidz(char *path, char *aux, char *pool, size_t size, 863243505Smm uint64_t ashift, int r) 864168404Spjd{ 865168404Spjd nvlist_t *raidz, **child; 866168404Spjd int c; 867168404Spjd 868168404Spjd if (r < 2) 869243505Smm return (make_vdev_file(path, aux, pool, size, ashift)); 870168404Spjd child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL); 871168404Spjd 872168404Spjd for (c = 0; c < r; c++) 873243505Smm child[c] = make_vdev_file(path, aux, pool, size, ashift); 874168404Spjd 875168404Spjd VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); 876168404Spjd VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, 877168404Spjd VDEV_TYPE_RAIDZ) == 0); 878168404Spjd VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY, 879236143Smm ztest_opts.zo_raidz_parity) == 0); 880168404Spjd VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, 881168404Spjd child, r) == 0); 882168404Spjd 883168404Spjd for (c = 0; c < r; c++) 884168404Spjd nvlist_free(child[c]); 885168404Spjd 886168404Spjd umem_free(child, r * sizeof (nvlist_t *)); 887168404Spjd 888168404Spjd return (raidz); 889168404Spjd} 890168404Spjd 891168404Spjdstatic nvlist_t * 892243505Smmmake_vdev_mirror(char *path, char *aux, char *pool, size_t size, 893243505Smm uint64_t ashift, int r, int m) 894168404Spjd{ 895168404Spjd nvlist_t *mirror, **child; 896168404Spjd int c; 897168404Spjd 898168404Spjd if (m < 1) 899243505Smm return (make_vdev_raidz(path, aux, pool, size, ashift, r)); 900168404Spjd 901168404Spjd child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL); 902168404Spjd 903168404Spjd for (c = 0; c < m; c++) 904243505Smm child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r); 905168404Spjd 906168404Spjd VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0); 907168404Spjd VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, 908168404Spjd VDEV_TYPE_MIRROR) == 0); 909168404Spjd VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, 910168404Spjd child, m) == 0); 911168404Spjd 912168404Spjd for (c = 0; c < m; c++) 913168404Spjd nvlist_free(child[c]); 914168404Spjd 915168404Spjd umem_free(child, m * sizeof (nvlist_t *)); 916168404Spjd 917168404Spjd return (mirror); 918168404Spjd} 919168404Spjd 920168404Spjdstatic nvlist_t * 921243505Smmmake_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift, 922243505Smm int log, int r, int m, int t) 923168404Spjd{ 924168404Spjd nvlist_t *root, **child; 925168404Spjd int c; 926168404Spjd 927168404Spjd ASSERT(t > 0); 928168404Spjd 929168404Spjd child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); 930168404Spjd 931185029Spjd for (c = 0; c < t; c++) { 932243505Smm child[c] = make_vdev_mirror(path, aux, pool, size, ashift, 933243505Smm r, m); 934185029Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, 935185029Spjd log) == 0); 936185029Spjd } 937168404Spjd 938168404Spjd VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); 939168404Spjd VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); 940185029Spjd VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN, 941168404Spjd child, t) == 0); 942168404Spjd 943168404Spjd for (c = 0; c < t; c++) 944168404Spjd nvlist_free(child[c]); 945168404Spjd 946168404Spjd umem_free(child, t * sizeof (nvlist_t *)); 947168404Spjd 948168404Spjd return (root); 949168404Spjd} 950168404Spjd 951243505Smm/* 952243505Smm * Find a random spa version. Returns back a random spa version in the 953243505Smm * range [initial_version, SPA_VERSION_FEATURES]. 954243505Smm */ 955243505Smmstatic uint64_t 956243505Smmztest_random_spa_version(uint64_t initial_version) 957243505Smm{ 958243505Smm uint64_t version = initial_version; 959243505Smm 960243505Smm if (version <= SPA_VERSION_BEFORE_FEATURES) { 961243505Smm version = version + 962243505Smm ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1); 963243505Smm } 964243505Smm 965243505Smm if (version > SPA_VERSION_BEFORE_FEATURES) 966243505Smm version = SPA_VERSION_FEATURES; 967243505Smm 968243505Smm ASSERT(SPA_VERSION_IS_SUPPORTED(version)); 969243505Smm return (version); 970243505Smm} 971243505Smm 972219089Spjdstatic int 973219089Spjdztest_random_blocksize(void) 974219089Spjd{ 975274337Sdelphij uint64_t block_shift; 976274337Sdelphij /* 977274337Sdelphij * Choose a block size >= the ashift. 978274337Sdelphij * If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks. 979274337Sdelphij */ 980274337Sdelphij int maxbs = SPA_OLD_MAXBLOCKSHIFT; 981274337Sdelphij if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE) 982274337Sdelphij maxbs = 20; 983284304Savg block_shift = ztest_random(maxbs - ztest_spa->spa_max_ashift + 1); 984268855Sdelphij return (1 << (SPA_MINBLOCKSHIFT + block_shift)); 985219089Spjd} 986219089Spjd 987219089Spjdstatic int 988219089Spjdztest_random_ibshift(void) 989219089Spjd{ 990219089Spjd return (DN_MIN_INDBLKSHIFT + 991219089Spjd ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1)); 992219089Spjd} 993219089Spjd 994219089Spjdstatic uint64_t 995219089Spjdztest_random_vdev_top(spa_t *spa, boolean_t log_ok) 996219089Spjd{ 997219089Spjd uint64_t top; 998219089Spjd vdev_t *rvd = spa->spa_root_vdev; 999219089Spjd vdev_t *tvd; 1000219089Spjd 1001219089Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); 1002219089Spjd 1003219089Spjd do { 1004219089Spjd top = ztest_random(rvd->vdev_children); 1005219089Spjd tvd = rvd->vdev_child[top]; 1006219089Spjd } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) || 1007219089Spjd tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL); 1008219089Spjd 1009219089Spjd return (top); 1010219089Spjd} 1011219089Spjd 1012219089Spjdstatic uint64_t 1013219089Spjdztest_random_dsl_prop(zfs_prop_t prop) 1014219089Spjd{ 1015219089Spjd uint64_t value; 1016219089Spjd 1017219089Spjd do { 1018219089Spjd value = zfs_prop_random_value(prop, ztest_random(-1ULL)); 1019219089Spjd } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF); 1020219089Spjd 1021219089Spjd return (value); 1022219089Spjd} 1023219089Spjd 1024219089Spjdstatic int 1025219089Spjdztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, 1026219089Spjd boolean_t inherit) 1027219089Spjd{ 1028219089Spjd const char *propname = zfs_prop_to_name(prop); 1029219089Spjd const char *valname; 1030219089Spjd char setpoint[MAXPATHLEN]; 1031219089Spjd uint64_t curval; 1032219089Spjd int error; 1033219089Spjd 1034248571Smm error = dsl_prop_set_int(osname, propname, 1035248571Smm (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value); 1036219089Spjd 1037219089Spjd if (error == ENOSPC) { 1038219089Spjd ztest_record_enospc(FTAG); 1039219089Spjd return (error); 1040219089Spjd } 1041240415Smm ASSERT0(error); 1042219089Spjd 1043248571Smm VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint)); 1044219089Spjd 1045236143Smm if (ztest_opts.zo_verbose >= 6) { 1046219089Spjd VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0); 1047219089Spjd (void) printf("%s %s = %s at '%s'\n", 1048219089Spjd osname, propname, valname, setpoint); 1049219089Spjd } 1050219089Spjd 1051219089Spjd return (error); 1052219089Spjd} 1053219089Spjd 1054219089Spjdstatic int 1055236143Smmztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) 1056219089Spjd{ 1057236143Smm spa_t *spa = ztest_spa; 1058219089Spjd nvlist_t *props = NULL; 1059219089Spjd int error; 1060219089Spjd 1061219089Spjd VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 1062219089Spjd VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0); 1063219089Spjd 1064219089Spjd error = spa_prop_set(spa, props); 1065219089Spjd 1066219089Spjd nvlist_free(props); 1067219089Spjd 1068219089Spjd if (error == ENOSPC) { 1069219089Spjd ztest_record_enospc(FTAG); 1070219089Spjd return (error); 1071219089Spjd } 1072240415Smm ASSERT0(error); 1073219089Spjd 1074219089Spjd return (error); 1075219089Spjd} 1076219089Spjd 1077168404Spjdstatic void 1078219089Spjdztest_rll_init(rll_t *rll) 1079168404Spjd{ 1080219089Spjd rll->rll_writer = NULL; 1081219089Spjd rll->rll_readers = 0; 1082219089Spjd VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0); 1083219089Spjd VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0); 1084219089Spjd} 1085219089Spjd 1086219089Spjdstatic void 1087219089Spjdztest_rll_destroy(rll_t *rll) 1088219089Spjd{ 1089219089Spjd ASSERT(rll->rll_writer == NULL); 1090219089Spjd ASSERT(rll->rll_readers == 0); 1091219089Spjd VERIFY(_mutex_destroy(&rll->rll_lock) == 0); 1092219089Spjd VERIFY(cond_destroy(&rll->rll_cv) == 0); 1093219089Spjd} 1094219089Spjd 1095219089Spjdstatic void 1096219089Spjdztest_rll_lock(rll_t *rll, rl_type_t type) 1097219089Spjd{ 1098219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1099219089Spjd 1100219089Spjd if (type == RL_READER) { 1101219089Spjd while (rll->rll_writer != NULL) 1102219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1103219089Spjd rll->rll_readers++; 1104219089Spjd } else { 1105219089Spjd while (rll->rll_writer != NULL || rll->rll_readers) 1106219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1107219089Spjd rll->rll_writer = curthread; 1108219089Spjd } 1109219089Spjd 1110219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1111219089Spjd} 1112219089Spjd 1113219089Spjdstatic void 1114219089Spjdztest_rll_unlock(rll_t *rll) 1115219089Spjd{ 1116219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1117219089Spjd 1118219089Spjd if (rll->rll_writer) { 1119219089Spjd ASSERT(rll->rll_readers == 0); 1120219089Spjd rll->rll_writer = NULL; 1121219089Spjd } else { 1122219089Spjd ASSERT(rll->rll_readers != 0); 1123219089Spjd ASSERT(rll->rll_writer == NULL); 1124219089Spjd rll->rll_readers--; 1125219089Spjd } 1126219089Spjd 1127219089Spjd if (rll->rll_writer == NULL && rll->rll_readers == 0) 1128219089Spjd VERIFY(cond_broadcast(&rll->rll_cv) == 0); 1129219089Spjd 1130219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1131219089Spjd} 1132219089Spjd 1133219089Spjdstatic void 1134219089Spjdztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type) 1135219089Spjd{ 1136219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1137219089Spjd 1138219089Spjd ztest_rll_lock(rll, type); 1139219089Spjd} 1140219089Spjd 1141219089Spjdstatic void 1142219089Spjdztest_object_unlock(ztest_ds_t *zd, uint64_t object) 1143219089Spjd{ 1144219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1145219089Spjd 1146219089Spjd ztest_rll_unlock(rll); 1147219089Spjd} 1148219089Spjd 1149219089Spjdstatic rl_t * 1150219089Spjdztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, 1151219089Spjd uint64_t size, rl_type_t type) 1152219089Spjd{ 1153219089Spjd uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); 1154219089Spjd rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; 1155219089Spjd rl_t *rl; 1156219089Spjd 1157219089Spjd rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); 1158219089Spjd rl->rl_object = object; 1159219089Spjd rl->rl_offset = offset; 1160219089Spjd rl->rl_size = size; 1161219089Spjd rl->rl_lock = rll; 1162219089Spjd 1163219089Spjd ztest_rll_lock(rll, type); 1164219089Spjd 1165219089Spjd return (rl); 1166219089Spjd} 1167219089Spjd 1168219089Spjdstatic void 1169219089Spjdztest_range_unlock(rl_t *rl) 1170219089Spjd{ 1171219089Spjd rll_t *rll = rl->rl_lock; 1172219089Spjd 1173219089Spjd ztest_rll_unlock(rll); 1174219089Spjd 1175219089Spjd umem_free(rl, sizeof (*rl)); 1176219089Spjd} 1177219089Spjd 1178219089Spjdstatic void 1179236143Smmztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) 1180219089Spjd{ 1181219089Spjd zd->zd_os = os; 1182219089Spjd zd->zd_zilog = dmu_objset_zil(os); 1183236143Smm zd->zd_shared = szd; 1184219089Spjd dmu_objset_name(os, zd->zd_name); 1185219089Spjd 1186236143Smm if (zd->zd_shared != NULL) 1187236143Smm zd->zd_shared->zd_seq = 0; 1188236143Smm 1189224526Smm VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0); 1190219089Spjd VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0); 1191219089Spjd 1192219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1193219089Spjd ztest_rll_init(&zd->zd_object_lock[l]); 1194219089Spjd 1195219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1196219089Spjd ztest_rll_init(&zd->zd_range_lock[l]); 1197219089Spjd} 1198219089Spjd 1199219089Spjdstatic void 1200219089Spjdztest_zd_fini(ztest_ds_t *zd) 1201219089Spjd{ 1202219089Spjd VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0); 1203219089Spjd 1204219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1205219089Spjd ztest_rll_destroy(&zd->zd_object_lock[l]); 1206219089Spjd 1207219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1208219089Spjd ztest_rll_destroy(&zd->zd_range_lock[l]); 1209219089Spjd} 1210219089Spjd 1211219089Spjd#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT) 1212219089Spjd 1213219089Spjdstatic uint64_t 1214219089Spjdztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag) 1215219089Spjd{ 1216219089Spjd uint64_t txg; 1217168404Spjd int error; 1218168404Spjd 1219219089Spjd /* 1220219089Spjd * Attempt to assign tx to some transaction group. 1221219089Spjd */ 1222219089Spjd error = dmu_tx_assign(tx, txg_how); 1223168404Spjd if (error) { 1224219089Spjd if (error == ERESTART) { 1225219089Spjd ASSERT(txg_how == TXG_NOWAIT); 1226219089Spjd dmu_tx_wait(tx); 1227219089Spjd } else { 1228219089Spjd ASSERT3U(error, ==, ENOSPC); 1229219089Spjd ztest_record_enospc(tag); 1230219089Spjd } 1231219089Spjd dmu_tx_abort(tx); 1232219089Spjd return (0); 1233168404Spjd } 1234219089Spjd txg = dmu_tx_get_txg(tx); 1235219089Spjd ASSERT(txg != 0); 1236219089Spjd return (txg); 1237168404Spjd} 1238168404Spjd 1239219089Spjdstatic void 1240219089Spjdztest_pattern_set(void *buf, uint64_t size, uint64_t value) 1241168404Spjd{ 1242219089Spjd uint64_t *ip = buf; 1243219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1244168404Spjd 1245219089Spjd while (ip < ip_end) 1246219089Spjd *ip++ = value; 1247219089Spjd} 1248168404Spjd 1249219089Spjdstatic boolean_t 1250219089Spjdztest_pattern_match(void *buf, uint64_t size, uint64_t value) 1251219089Spjd{ 1252219089Spjd uint64_t *ip = buf; 1253219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1254219089Spjd uint64_t diff = 0; 1255168404Spjd 1256219089Spjd while (ip < ip_end) 1257219089Spjd diff |= (value - *ip++); 1258219089Spjd 1259219089Spjd return (diff == 0); 1260168404Spjd} 1261168404Spjd 1262219089Spjdstatic void 1263219089Spjdztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1264219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1265168404Spjd{ 1266219089Spjd bt->bt_magic = BT_MAGIC; 1267219089Spjd bt->bt_objset = dmu_objset_id(os); 1268219089Spjd bt->bt_object = object; 1269219089Spjd bt->bt_offset = offset; 1270219089Spjd bt->bt_gen = gen; 1271219089Spjd bt->bt_txg = txg; 1272219089Spjd bt->bt_crtxg = crtxg; 1273168404Spjd} 1274168404Spjd 1275219089Spjdstatic void 1276219089Spjdztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1277219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1278219089Spjd{ 1279268075Sdelphij ASSERT3U(bt->bt_magic, ==, BT_MAGIC); 1280268075Sdelphij ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os)); 1281268075Sdelphij ASSERT3U(bt->bt_object, ==, object); 1282268075Sdelphij ASSERT3U(bt->bt_offset, ==, offset); 1283268075Sdelphij ASSERT3U(bt->bt_gen, <=, gen); 1284268075Sdelphij ASSERT3U(bt->bt_txg, <=, txg); 1285268075Sdelphij ASSERT3U(bt->bt_crtxg, ==, crtxg); 1286219089Spjd} 1287219089Spjd 1288219089Spjdstatic ztest_block_tag_t * 1289219089Spjdztest_bt_bonus(dmu_buf_t *db) 1290219089Spjd{ 1291219089Spjd dmu_object_info_t doi; 1292219089Spjd ztest_block_tag_t *bt; 1293219089Spjd 1294219089Spjd dmu_object_info_from_db(db, &doi); 1295219089Spjd ASSERT3U(doi.doi_bonus_size, <=, db->db_size); 1296219089Spjd ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt)); 1297219089Spjd bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt)); 1298219089Spjd 1299219089Spjd return (bt); 1300219089Spjd} 1301219089Spjd 1302219089Spjd/* 1303219089Spjd * ZIL logging ops 1304219089Spjd */ 1305219089Spjd 1306219089Spjd#define lrz_type lr_mode 1307219089Spjd#define lrz_blocksize lr_uid 1308219089Spjd#define lrz_ibshift lr_gid 1309219089Spjd#define lrz_bonustype lr_rdev 1310219089Spjd#define lrz_bonuslen lr_crtime[1] 1311219089Spjd 1312219089Spjdstatic void 1313219089Spjdztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) 1314219089Spjd{ 1315219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1316219089Spjd size_t namesize = strlen(name) + 1; 1317219089Spjd itx_t *itx; 1318219089Spjd 1319219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1320219089Spjd return; 1321219089Spjd 1322219089Spjd itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize); 1323219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1324219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1325219089Spjd 1326219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1327219089Spjd} 1328219089Spjd 1329219089Spjdstatic void 1330219089Spjdztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object) 1331219089Spjd{ 1332219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1333219089Spjd size_t namesize = strlen(name) + 1; 1334219089Spjd itx_t *itx; 1335219089Spjd 1336219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1337219089Spjd return; 1338219089Spjd 1339219089Spjd itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize); 1340219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1341219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1342219089Spjd 1343219089Spjd itx->itx_oid = object; 1344219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1345219089Spjd} 1346219089Spjd 1347219089Spjdstatic void 1348219089Spjdztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) 1349219089Spjd{ 1350219089Spjd itx_t *itx; 1351219089Spjd itx_wr_state_t write_state = ztest_random(WR_NUM_STATES); 1352219089Spjd 1353219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1354219089Spjd return; 1355219089Spjd 1356219089Spjd if (lr->lr_length > ZIL_MAX_LOG_DATA) 1357219089Spjd write_state = WR_INDIRECT; 1358219089Spjd 1359219089Spjd itx = zil_itx_create(TX_WRITE, 1360219089Spjd sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0)); 1361219089Spjd 1362219089Spjd if (write_state == WR_COPIED && 1363219089Spjd dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length, 1364219089Spjd ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) { 1365219089Spjd zil_itx_destroy(itx); 1366219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1367219089Spjd write_state = WR_NEED_COPY; 1368219089Spjd } 1369219089Spjd itx->itx_private = zd; 1370219089Spjd itx->itx_wr_state = write_state; 1371219089Spjd itx->itx_sync = (ztest_random(8) == 0); 1372219089Spjd itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0); 1373219089Spjd 1374219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1375219089Spjd sizeof (*lr) - sizeof (lr_t)); 1376219089Spjd 1377219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1378219089Spjd} 1379219089Spjd 1380219089Spjdstatic void 1381219089Spjdztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr) 1382219089Spjd{ 1383219089Spjd itx_t *itx; 1384219089Spjd 1385219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1386219089Spjd return; 1387219089Spjd 1388219089Spjd itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 1389219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1390219089Spjd sizeof (*lr) - sizeof (lr_t)); 1391219089Spjd 1392219089Spjd itx->itx_sync = B_FALSE; 1393219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1394219089Spjd} 1395219089Spjd 1396219089Spjdstatic void 1397219089Spjdztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr) 1398219089Spjd{ 1399219089Spjd itx_t *itx; 1400219089Spjd 1401219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1402219089Spjd return; 1403219089Spjd 1404219089Spjd itx = zil_itx_create(TX_SETATTR, sizeof (*lr)); 1405219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1406219089Spjd sizeof (*lr) - sizeof (lr_t)); 1407219089Spjd 1408219089Spjd itx->itx_sync = B_FALSE; 1409219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1410219089Spjd} 1411219089Spjd 1412219089Spjd/* 1413219089Spjd * ZIL replay ops 1414219089Spjd */ 1415168404Spjdstatic int 1416219089Spjdztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap) 1417168404Spjd{ 1418219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1419219089Spjd objset_t *os = zd->zd_os; 1420219089Spjd ztest_block_tag_t *bbt; 1421219089Spjd dmu_buf_t *db; 1422168404Spjd dmu_tx_t *tx; 1423219089Spjd uint64_t txg; 1424219089Spjd int error = 0; 1425168404Spjd 1426168404Spjd if (byteswap) 1427168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1428168404Spjd 1429219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1430219089Spjd ASSERT(name[0] != '\0'); 1431219089Spjd 1432168404Spjd tx = dmu_tx_create(os); 1433219089Spjd 1434219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name); 1435219089Spjd 1436219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1437219089Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1438219089Spjd } else { 1439219089Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1440219089Spjd } 1441219089Spjd 1442219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1443219089Spjd if (txg == 0) 1444219089Spjd return (ENOSPC); 1445219089Spjd 1446219089Spjd ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid); 1447219089Spjd 1448219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1449219089Spjd if (lr->lr_foid == 0) { 1450219089Spjd lr->lr_foid = zap_create(os, 1451219089Spjd lr->lrz_type, lr->lrz_bonustype, 1452219089Spjd lr->lrz_bonuslen, tx); 1453219089Spjd } else { 1454219089Spjd error = zap_create_claim(os, lr->lr_foid, 1455219089Spjd lr->lrz_type, lr->lrz_bonustype, 1456219089Spjd lr->lrz_bonuslen, tx); 1457219089Spjd } 1458219089Spjd } else { 1459219089Spjd if (lr->lr_foid == 0) { 1460219089Spjd lr->lr_foid = dmu_object_alloc(os, 1461219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1462219089Spjd lr->lrz_bonuslen, tx); 1463219089Spjd } else { 1464219089Spjd error = dmu_object_claim(os, lr->lr_foid, 1465219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1466219089Spjd lr->lrz_bonuslen, tx); 1467219089Spjd } 1468219089Spjd } 1469219089Spjd 1470168404Spjd if (error) { 1471219089Spjd ASSERT3U(error, ==, EEXIST); 1472219089Spjd ASSERT(zd->zd_zilog->zl_replay); 1473219089Spjd dmu_tx_commit(tx); 1474168404Spjd return (error); 1475168404Spjd } 1476168404Spjd 1477219089Spjd ASSERT(lr->lr_foid != 0); 1478219089Spjd 1479219089Spjd if (lr->lrz_type != DMU_OT_ZAP_OTHER) 1480219089Spjd VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid, 1481219089Spjd lr->lrz_blocksize, lr->lrz_ibshift, tx)); 1482219089Spjd 1483219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1484219089Spjd bbt = ztest_bt_bonus(db); 1485219089Spjd dmu_buf_will_dirty(db, tx); 1486219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg); 1487219089Spjd dmu_buf_rele(db, FTAG); 1488219089Spjd 1489219089Spjd VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1, 1490219089Spjd &lr->lr_foid, tx)); 1491219089Spjd 1492219089Spjd (void) ztest_log_create(zd, tx, lr); 1493219089Spjd 1494168404Spjd dmu_tx_commit(tx); 1495168404Spjd 1496219089Spjd return (0); 1497219089Spjd} 1498219089Spjd 1499219089Spjdstatic int 1500219089Spjdztest_replay_remove(ztest_ds_t *zd, lr_remove_t *lr, boolean_t byteswap) 1501219089Spjd{ 1502219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1503219089Spjd objset_t *os = zd->zd_os; 1504219089Spjd dmu_object_info_t doi; 1505219089Spjd dmu_tx_t *tx; 1506219089Spjd uint64_t object, txg; 1507219089Spjd 1508219089Spjd if (byteswap) 1509219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1510219089Spjd 1511219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1512219089Spjd ASSERT(name[0] != '\0'); 1513219089Spjd 1514219089Spjd VERIFY3U(0, ==, 1515219089Spjd zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object)); 1516219089Spjd ASSERT(object != 0); 1517219089Spjd 1518219089Spjd ztest_object_lock(zd, object, RL_WRITER); 1519219089Spjd 1520219089Spjd VERIFY3U(0, ==, dmu_object_info(os, object, &doi)); 1521219089Spjd 1522219089Spjd tx = dmu_tx_create(os); 1523219089Spjd 1524219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name); 1525219089Spjd dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); 1526219089Spjd 1527219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1528219089Spjd if (txg == 0) { 1529219089Spjd ztest_object_unlock(zd, object); 1530219089Spjd return (ENOSPC); 1531168404Spjd } 1532168404Spjd 1533219089Spjd if (doi.doi_type == DMU_OT_ZAP_OTHER) { 1534219089Spjd VERIFY3U(0, ==, zap_destroy(os, object, tx)); 1535219089Spjd } else { 1536219089Spjd VERIFY3U(0, ==, dmu_object_free(os, object, tx)); 1537219089Spjd } 1538219089Spjd 1539219089Spjd VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx)); 1540219089Spjd 1541219089Spjd (void) ztest_log_remove(zd, tx, lr, object); 1542219089Spjd 1543219089Spjd dmu_tx_commit(tx); 1544219089Spjd 1545219089Spjd ztest_object_unlock(zd, object); 1546219089Spjd 1547219089Spjd return (0); 1548168404Spjd} 1549168404Spjd 1550168404Spjdstatic int 1551219089Spjdztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) 1552168404Spjd{ 1553219089Spjd objset_t *os = zd->zd_os; 1554219089Spjd void *data = lr + 1; /* data follows lr */ 1555219089Spjd uint64_t offset, length; 1556219089Spjd ztest_block_tag_t *bt = data; 1557219089Spjd ztest_block_tag_t *bbt; 1558219089Spjd uint64_t gen, txg, lrtxg, crtxg; 1559219089Spjd dmu_object_info_t doi; 1560168404Spjd dmu_tx_t *tx; 1561219089Spjd dmu_buf_t *db; 1562219089Spjd arc_buf_t *abuf = NULL; 1563219089Spjd rl_t *rl; 1564168404Spjd 1565168404Spjd if (byteswap) 1566168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1567168404Spjd 1568219089Spjd offset = lr->lr_offset; 1569219089Spjd length = lr->lr_length; 1570219089Spjd 1571219089Spjd /* If it's a dmu_sync() block, write the whole block */ 1572219089Spjd if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 1573219089Spjd uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 1574219089Spjd if (length < blocksize) { 1575219089Spjd offset -= offset % blocksize; 1576219089Spjd length = blocksize; 1577219089Spjd } 1578219089Spjd } 1579219089Spjd 1580219089Spjd if (bt->bt_magic == BSWAP_64(BT_MAGIC)) 1581219089Spjd byteswap_uint64_array(bt, sizeof (*bt)); 1582219089Spjd 1583219089Spjd if (bt->bt_magic != BT_MAGIC) 1584219089Spjd bt = NULL; 1585219089Spjd 1586219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1587219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); 1588219089Spjd 1589219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1590219089Spjd 1591219089Spjd dmu_object_info_from_db(db, &doi); 1592219089Spjd 1593219089Spjd bbt = ztest_bt_bonus(db); 1594219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1595219089Spjd gen = bbt->bt_gen; 1596219089Spjd crtxg = bbt->bt_crtxg; 1597219089Spjd lrtxg = lr->lr_common.lrc_txg; 1598219089Spjd 1599168404Spjd tx = dmu_tx_create(os); 1600219089Spjd 1601219089Spjd dmu_tx_hold_write(tx, lr->lr_foid, offset, length); 1602219089Spjd 1603219089Spjd if (ztest_random(8) == 0 && length == doi.doi_data_block_size && 1604219089Spjd P2PHASE(offset, length) == 0) 1605219089Spjd abuf = dmu_request_arcbuf(db, length); 1606219089Spjd 1607219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1608219089Spjd if (txg == 0) { 1609219089Spjd if (abuf != NULL) 1610219089Spjd dmu_return_arcbuf(abuf); 1611219089Spjd dmu_buf_rele(db, FTAG); 1612219089Spjd ztest_range_unlock(rl); 1613219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1614219089Spjd return (ENOSPC); 1615168404Spjd } 1616168404Spjd 1617219089Spjd if (bt != NULL) { 1618219089Spjd /* 1619219089Spjd * Usually, verify the old data before writing new data -- 1620219089Spjd * but not always, because we also want to verify correct 1621219089Spjd * behavior when the data was not recently read into cache. 1622219089Spjd */ 1623219089Spjd ASSERT(offset % doi.doi_data_block_size == 0); 1624219089Spjd if (ztest_random(4) != 0) { 1625219089Spjd int prefetch = ztest_random(2) ? 1626219089Spjd DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH; 1627219089Spjd ztest_block_tag_t rbt; 1628219089Spjd 1629219089Spjd VERIFY(dmu_read(os, lr->lr_foid, offset, 1630219089Spjd sizeof (rbt), &rbt, prefetch) == 0); 1631219089Spjd if (rbt.bt_magic == BT_MAGIC) { 1632219089Spjd ztest_bt_verify(&rbt, os, lr->lr_foid, 1633219089Spjd offset, gen, txg, crtxg); 1634219089Spjd } 1635219089Spjd } 1636219089Spjd 1637219089Spjd /* 1638219089Spjd * Writes can appear to be newer than the bonus buffer because 1639219089Spjd * the ztest_get_data() callback does a dmu_read() of the 1640219089Spjd * open-context data, which may be different than the data 1641219089Spjd * as it was when the write was generated. 1642219089Spjd */ 1643219089Spjd if (zd->zd_zilog->zl_replay) { 1644219089Spjd ztest_bt_verify(bt, os, lr->lr_foid, offset, 1645219089Spjd MAX(gen, bt->bt_gen), MAX(txg, lrtxg), 1646219089Spjd bt->bt_crtxg); 1647219089Spjd } 1648219089Spjd 1649219089Spjd /* 1650219089Spjd * Set the bt's gen/txg to the bonus buffer's gen/txg 1651219089Spjd * so that all of the usual ASSERTs will work. 1652219089Spjd */ 1653219089Spjd ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg); 1654219089Spjd } 1655219089Spjd 1656219089Spjd if (abuf == NULL) { 1657219089Spjd dmu_write(os, lr->lr_foid, offset, length, data, tx); 1658219089Spjd } else { 1659219089Spjd bcopy(data, abuf->b_data, length); 1660219089Spjd dmu_assign_arcbuf(db, offset, abuf, tx); 1661219089Spjd } 1662219089Spjd 1663219089Spjd (void) ztest_log_write(zd, tx, lr); 1664219089Spjd 1665219089Spjd dmu_buf_rele(db, FTAG); 1666219089Spjd 1667168404Spjd dmu_tx_commit(tx); 1668168404Spjd 1669219089Spjd ztest_range_unlock(rl); 1670219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1671219089Spjd 1672219089Spjd return (0); 1673168404Spjd} 1674168404Spjd 1675219089Spjdstatic int 1676219089Spjdztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) 1677219089Spjd{ 1678219089Spjd objset_t *os = zd->zd_os; 1679219089Spjd dmu_tx_t *tx; 1680219089Spjd uint64_t txg; 1681219089Spjd rl_t *rl; 1682219089Spjd 1683219089Spjd if (byteswap) 1684219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1685219089Spjd 1686219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1687219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, 1688219089Spjd RL_WRITER); 1689219089Spjd 1690219089Spjd tx = dmu_tx_create(os); 1691219089Spjd 1692219089Spjd dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length); 1693219089Spjd 1694219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1695219089Spjd if (txg == 0) { 1696219089Spjd ztest_range_unlock(rl); 1697219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1698219089Spjd return (ENOSPC); 1699219089Spjd } 1700219089Spjd 1701219089Spjd VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset, 1702219089Spjd lr->lr_length, tx) == 0); 1703219089Spjd 1704219089Spjd (void) ztest_log_truncate(zd, tx, lr); 1705219089Spjd 1706219089Spjd dmu_tx_commit(tx); 1707219089Spjd 1708219089Spjd ztest_range_unlock(rl); 1709219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1710219089Spjd 1711219089Spjd return (0); 1712219089Spjd} 1713219089Spjd 1714219089Spjdstatic int 1715219089Spjdztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap) 1716219089Spjd{ 1717219089Spjd objset_t *os = zd->zd_os; 1718219089Spjd dmu_tx_t *tx; 1719219089Spjd dmu_buf_t *db; 1720219089Spjd ztest_block_tag_t *bbt; 1721219089Spjd uint64_t txg, lrtxg, crtxg; 1722219089Spjd 1723219089Spjd if (byteswap) 1724219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1725219089Spjd 1726219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_WRITER); 1727219089Spjd 1728219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1729219089Spjd 1730219089Spjd tx = dmu_tx_create(os); 1731219089Spjd dmu_tx_hold_bonus(tx, lr->lr_foid); 1732219089Spjd 1733219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1734219089Spjd if (txg == 0) { 1735219089Spjd dmu_buf_rele(db, FTAG); 1736219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1737219089Spjd return (ENOSPC); 1738219089Spjd } 1739219089Spjd 1740219089Spjd bbt = ztest_bt_bonus(db); 1741219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1742219089Spjd crtxg = bbt->bt_crtxg; 1743219089Spjd lrtxg = lr->lr_common.lrc_txg; 1744219089Spjd 1745219089Spjd if (zd->zd_zilog->zl_replay) { 1746219089Spjd ASSERT(lr->lr_size != 0); 1747219089Spjd ASSERT(lr->lr_mode != 0); 1748219089Spjd ASSERT(lrtxg != 0); 1749219089Spjd } else { 1750219089Spjd /* 1751219089Spjd * Randomly change the size and increment the generation. 1752219089Spjd */ 1753219089Spjd lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) * 1754219089Spjd sizeof (*bbt); 1755219089Spjd lr->lr_mode = bbt->bt_gen + 1; 1756219089Spjd ASSERT(lrtxg == 0); 1757219089Spjd } 1758219089Spjd 1759219089Spjd /* 1760219089Spjd * Verify that the current bonus buffer is not newer than our txg. 1761219089Spjd */ 1762219089Spjd ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, 1763219089Spjd MAX(txg, lrtxg), crtxg); 1764219089Spjd 1765219089Spjd dmu_buf_will_dirty(db, tx); 1766219089Spjd 1767219089Spjd ASSERT3U(lr->lr_size, >=, sizeof (*bbt)); 1768219089Spjd ASSERT3U(lr->lr_size, <=, db->db_size); 1769240415Smm VERIFY0(dmu_set_bonus(db, lr->lr_size, tx)); 1770219089Spjd bbt = ztest_bt_bonus(db); 1771219089Spjd 1772219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg); 1773219089Spjd 1774219089Spjd dmu_buf_rele(db, FTAG); 1775219089Spjd 1776219089Spjd (void) ztest_log_setattr(zd, tx, lr); 1777219089Spjd 1778219089Spjd dmu_tx_commit(tx); 1779219089Spjd 1780219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1781219089Spjd 1782219089Spjd return (0); 1783219089Spjd} 1784219089Spjd 1785168404Spjdzil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { 1786168404Spjd NULL, /* 0 no such transaction type */ 1787168404Spjd ztest_replay_create, /* TX_CREATE */ 1788168404Spjd NULL, /* TX_MKDIR */ 1789168404Spjd NULL, /* TX_MKXATTR */ 1790168404Spjd NULL, /* TX_SYMLINK */ 1791168404Spjd ztest_replay_remove, /* TX_REMOVE */ 1792168404Spjd NULL, /* TX_RMDIR */ 1793168404Spjd NULL, /* TX_LINK */ 1794168404Spjd NULL, /* TX_RENAME */ 1795219089Spjd ztest_replay_write, /* TX_WRITE */ 1796219089Spjd ztest_replay_truncate, /* TX_TRUNCATE */ 1797219089Spjd ztest_replay_setattr, /* TX_SETATTR */ 1798168404Spjd NULL, /* TX_ACL */ 1799209962Smm NULL, /* TX_CREATE_ACL */ 1800209962Smm NULL, /* TX_CREATE_ATTR */ 1801209962Smm NULL, /* TX_CREATE_ACL_ATTR */ 1802209962Smm NULL, /* TX_MKDIR_ACL */ 1803209962Smm NULL, /* TX_MKDIR_ATTR */ 1804209962Smm NULL, /* TX_MKDIR_ACL_ATTR */ 1805209962Smm NULL, /* TX_WRITE2 */ 1806168404Spjd}; 1807168404Spjd 1808168404Spjd/* 1809219089Spjd * ZIL get_data callbacks 1810219089Spjd */ 1811219089Spjd 1812219089Spjdstatic void 1813219089Spjdztest_get_done(zgd_t *zgd, int error) 1814219089Spjd{ 1815219089Spjd ztest_ds_t *zd = zgd->zgd_private; 1816219089Spjd uint64_t object = zgd->zgd_rl->rl_object; 1817219089Spjd 1818219089Spjd if (zgd->zgd_db) 1819219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1820219089Spjd 1821219089Spjd ztest_range_unlock(zgd->zgd_rl); 1822219089Spjd ztest_object_unlock(zd, object); 1823219089Spjd 1824219089Spjd if (error == 0 && zgd->zgd_bp) 1825219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1826219089Spjd 1827219089Spjd umem_free(zgd, sizeof (*zgd)); 1828219089Spjd} 1829219089Spjd 1830219089Spjdstatic int 1831219089Spjdztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1832219089Spjd{ 1833219089Spjd ztest_ds_t *zd = arg; 1834219089Spjd objset_t *os = zd->zd_os; 1835219089Spjd uint64_t object = lr->lr_foid; 1836219089Spjd uint64_t offset = lr->lr_offset; 1837219089Spjd uint64_t size = lr->lr_length; 1838219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1839219089Spjd uint64_t txg = lr->lr_common.lrc_txg; 1840219089Spjd uint64_t crtxg; 1841219089Spjd dmu_object_info_t doi; 1842219089Spjd dmu_buf_t *db; 1843219089Spjd zgd_t *zgd; 1844219089Spjd int error; 1845219089Spjd 1846219089Spjd ztest_object_lock(zd, object, RL_READER); 1847219089Spjd error = dmu_bonus_hold(os, object, FTAG, &db); 1848219089Spjd if (error) { 1849219089Spjd ztest_object_unlock(zd, object); 1850219089Spjd return (error); 1851219089Spjd } 1852219089Spjd 1853219089Spjd crtxg = ztest_bt_bonus(db)->bt_crtxg; 1854219089Spjd 1855219089Spjd if (crtxg == 0 || crtxg > txg) { 1856219089Spjd dmu_buf_rele(db, FTAG); 1857219089Spjd ztest_object_unlock(zd, object); 1858219089Spjd return (ENOENT); 1859219089Spjd } 1860219089Spjd 1861219089Spjd dmu_object_info_from_db(db, &doi); 1862219089Spjd dmu_buf_rele(db, FTAG); 1863219089Spjd db = NULL; 1864219089Spjd 1865219089Spjd zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); 1866219089Spjd zgd->zgd_zilog = zd->zd_zilog; 1867219089Spjd zgd->zgd_private = zd; 1868219089Spjd 1869219089Spjd if (buf != NULL) { /* immediate write */ 1870219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1871219089Spjd RL_READER); 1872219089Spjd 1873219089Spjd error = dmu_read(os, object, offset, size, buf, 1874219089Spjd DMU_READ_NO_PREFETCH); 1875219089Spjd ASSERT(error == 0); 1876219089Spjd } else { 1877219089Spjd size = doi.doi_data_block_size; 1878219089Spjd if (ISP2(size)) { 1879219089Spjd offset = P2ALIGN(offset, size); 1880219089Spjd } else { 1881219089Spjd ASSERT(offset < size); 1882219089Spjd offset = 0; 1883219089Spjd } 1884219089Spjd 1885219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1886219089Spjd RL_READER); 1887219089Spjd 1888219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1889219089Spjd DMU_READ_NO_PREFETCH); 1890219089Spjd 1891219089Spjd if (error == 0) { 1892243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1893243524Smm if (obp) { 1894243524Smm ASSERT(BP_IS_HOLE(bp)); 1895243524Smm *bp = *obp; 1896243524Smm } 1897243524Smm 1898219089Spjd zgd->zgd_db = db; 1899219089Spjd zgd->zgd_bp = bp; 1900219089Spjd 1901219089Spjd ASSERT(db->db_offset == offset); 1902219089Spjd ASSERT(db->db_size == size); 1903219089Spjd 1904219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1905219089Spjd ztest_get_done, zgd); 1906219089Spjd 1907219089Spjd if (error == 0) 1908219089Spjd return (0); 1909219089Spjd } 1910219089Spjd } 1911219089Spjd 1912219089Spjd ztest_get_done(zgd, error); 1913219089Spjd 1914219089Spjd return (error); 1915219089Spjd} 1916219089Spjd 1917219089Spjdstatic void * 1918219089Spjdztest_lr_alloc(size_t lrsize, char *name) 1919219089Spjd{ 1920219089Spjd char *lr; 1921219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1922219089Spjd 1923219089Spjd lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL); 1924219089Spjd 1925219089Spjd if (name) 1926219089Spjd bcopy(name, lr + lrsize, namesize); 1927219089Spjd 1928219089Spjd return (lr); 1929219089Spjd} 1930219089Spjd 1931219089Spjdvoid 1932219089Spjdztest_lr_free(void *lr, size_t lrsize, char *name) 1933219089Spjd{ 1934219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1935219089Spjd 1936219089Spjd umem_free(lr, lrsize + namesize); 1937219089Spjd} 1938219089Spjd 1939219089Spjd/* 1940219089Spjd * Lookup a bunch of objects. Returns the number of objects not found. 1941219089Spjd */ 1942219089Spjdstatic int 1943219089Spjdztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) 1944219089Spjd{ 1945219089Spjd int missing = 0; 1946219089Spjd int error; 1947219089Spjd 1948219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1949219089Spjd 1950219089Spjd for (int i = 0; i < count; i++, od++) { 1951219089Spjd od->od_object = 0; 1952219089Spjd error = zap_lookup(zd->zd_os, od->od_dir, od->od_name, 1953219089Spjd sizeof (uint64_t), 1, &od->od_object); 1954219089Spjd if (error) { 1955219089Spjd ASSERT(error == ENOENT); 1956219089Spjd ASSERT(od->od_object == 0); 1957219089Spjd missing++; 1958219089Spjd } else { 1959219089Spjd dmu_buf_t *db; 1960219089Spjd ztest_block_tag_t *bbt; 1961219089Spjd dmu_object_info_t doi; 1962219089Spjd 1963219089Spjd ASSERT(od->od_object != 0); 1964219089Spjd ASSERT(missing == 0); /* there should be no gaps */ 1965219089Spjd 1966219089Spjd ztest_object_lock(zd, od->od_object, RL_READER); 1967219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os, 1968219089Spjd od->od_object, FTAG, &db)); 1969219089Spjd dmu_object_info_from_db(db, &doi); 1970219089Spjd bbt = ztest_bt_bonus(db); 1971219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1972219089Spjd od->od_type = doi.doi_type; 1973219089Spjd od->od_blocksize = doi.doi_data_block_size; 1974219089Spjd od->od_gen = bbt->bt_gen; 1975219089Spjd dmu_buf_rele(db, FTAG); 1976219089Spjd ztest_object_unlock(zd, od->od_object); 1977219089Spjd } 1978219089Spjd } 1979219089Spjd 1980219089Spjd return (missing); 1981219089Spjd} 1982219089Spjd 1983219089Spjdstatic int 1984219089Spjdztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) 1985219089Spjd{ 1986219089Spjd int missing = 0; 1987219089Spjd 1988219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1989219089Spjd 1990219089Spjd for (int i = 0; i < count; i++, od++) { 1991219089Spjd if (missing) { 1992219089Spjd od->od_object = 0; 1993219089Spjd missing++; 1994219089Spjd continue; 1995219089Spjd } 1996219089Spjd 1997219089Spjd lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 1998219089Spjd 1999219089Spjd lr->lr_doid = od->od_dir; 2000219089Spjd lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */ 2001219089Spjd lr->lrz_type = od->od_crtype; 2002219089Spjd lr->lrz_blocksize = od->od_crblocksize; 2003219089Spjd lr->lrz_ibshift = ztest_random_ibshift(); 2004219089Spjd lr->lrz_bonustype = DMU_OT_UINT64_OTHER; 2005219089Spjd lr->lrz_bonuslen = dmu_bonus_max(); 2006219089Spjd lr->lr_gen = od->od_crgen; 2007219089Spjd lr->lr_crtime[0] = time(NULL); 2008219089Spjd 2009219089Spjd if (ztest_replay_create(zd, lr, B_FALSE) != 0) { 2010219089Spjd ASSERT(missing == 0); 2011219089Spjd od->od_object = 0; 2012219089Spjd missing++; 2013219089Spjd } else { 2014219089Spjd od->od_object = lr->lr_foid; 2015219089Spjd od->od_type = od->od_crtype; 2016219089Spjd od->od_blocksize = od->od_crblocksize; 2017219089Spjd od->od_gen = od->od_crgen; 2018219089Spjd ASSERT(od->od_object != 0); 2019219089Spjd } 2020219089Spjd 2021219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2022219089Spjd } 2023219089Spjd 2024219089Spjd return (missing); 2025219089Spjd} 2026219089Spjd 2027219089Spjdstatic int 2028219089Spjdztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) 2029219089Spjd{ 2030219089Spjd int missing = 0; 2031219089Spjd int error; 2032219089Spjd 2033219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 2034219089Spjd 2035219089Spjd od += count - 1; 2036219089Spjd 2037219089Spjd for (int i = count - 1; i >= 0; i--, od--) { 2038219089Spjd if (missing) { 2039219089Spjd missing++; 2040219089Spjd continue; 2041219089Spjd } 2042219089Spjd 2043243524Smm /* 2044243524Smm * No object was found. 2045243524Smm */ 2046219089Spjd if (od->od_object == 0) 2047219089Spjd continue; 2048219089Spjd 2049219089Spjd lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 2050219089Spjd 2051219089Spjd lr->lr_doid = od->od_dir; 2052219089Spjd 2053219089Spjd if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) { 2054219089Spjd ASSERT3U(error, ==, ENOSPC); 2055219089Spjd missing++; 2056219089Spjd } else { 2057219089Spjd od->od_object = 0; 2058219089Spjd } 2059219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2060219089Spjd } 2061219089Spjd 2062219089Spjd return (missing); 2063219089Spjd} 2064219089Spjd 2065219089Spjdstatic int 2066219089Spjdztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size, 2067219089Spjd void *data) 2068219089Spjd{ 2069219089Spjd lr_write_t *lr; 2070219089Spjd int error; 2071219089Spjd 2072219089Spjd lr = ztest_lr_alloc(sizeof (*lr) + size, NULL); 2073219089Spjd 2074219089Spjd lr->lr_foid = object; 2075219089Spjd lr->lr_offset = offset; 2076219089Spjd lr->lr_length = size; 2077219089Spjd lr->lr_blkoff = 0; 2078219089Spjd BP_ZERO(&lr->lr_blkptr); 2079219089Spjd 2080219089Spjd bcopy(data, lr + 1, size); 2081219089Spjd 2082219089Spjd error = ztest_replay_write(zd, lr, B_FALSE); 2083219089Spjd 2084219089Spjd ztest_lr_free(lr, sizeof (*lr) + size, NULL); 2085219089Spjd 2086219089Spjd return (error); 2087219089Spjd} 2088219089Spjd 2089219089Spjdstatic int 2090219089Spjdztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2091219089Spjd{ 2092219089Spjd lr_truncate_t *lr; 2093219089Spjd int error; 2094219089Spjd 2095219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2096219089Spjd 2097219089Spjd lr->lr_foid = object; 2098219089Spjd lr->lr_offset = offset; 2099219089Spjd lr->lr_length = size; 2100219089Spjd 2101219089Spjd error = ztest_replay_truncate(zd, lr, B_FALSE); 2102219089Spjd 2103219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2104219089Spjd 2105219089Spjd return (error); 2106219089Spjd} 2107219089Spjd 2108219089Spjdstatic int 2109219089Spjdztest_setattr(ztest_ds_t *zd, uint64_t object) 2110219089Spjd{ 2111219089Spjd lr_setattr_t *lr; 2112219089Spjd int error; 2113219089Spjd 2114219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2115219089Spjd 2116219089Spjd lr->lr_foid = object; 2117219089Spjd lr->lr_size = 0; 2118219089Spjd lr->lr_mode = 0; 2119219089Spjd 2120219089Spjd error = ztest_replay_setattr(zd, lr, B_FALSE); 2121219089Spjd 2122219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2123219089Spjd 2124219089Spjd return (error); 2125219089Spjd} 2126219089Spjd 2127219089Spjdstatic void 2128219089Spjdztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2129219089Spjd{ 2130219089Spjd objset_t *os = zd->zd_os; 2131219089Spjd dmu_tx_t *tx; 2132219089Spjd uint64_t txg; 2133219089Spjd rl_t *rl; 2134219089Spjd 2135219089Spjd txg_wait_synced(dmu_objset_pool(os), 0); 2136219089Spjd 2137219089Spjd ztest_object_lock(zd, object, RL_READER); 2138219089Spjd rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); 2139219089Spjd 2140219089Spjd tx = dmu_tx_create(os); 2141219089Spjd 2142219089Spjd dmu_tx_hold_write(tx, object, offset, size); 2143219089Spjd 2144219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 2145219089Spjd 2146219089Spjd if (txg != 0) { 2147219089Spjd dmu_prealloc(os, object, offset, size, tx); 2148219089Spjd dmu_tx_commit(tx); 2149219089Spjd txg_wait_synced(dmu_objset_pool(os), txg); 2150219089Spjd } else { 2151219089Spjd (void) dmu_free_long_range(os, object, offset, size); 2152219089Spjd } 2153219089Spjd 2154219089Spjd ztest_range_unlock(rl); 2155219089Spjd ztest_object_unlock(zd, object); 2156219089Spjd} 2157219089Spjd 2158219089Spjdstatic void 2159219089Spjdztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) 2160219089Spjd{ 2161243524Smm int err; 2162219089Spjd ztest_block_tag_t wbt; 2163219089Spjd dmu_object_info_t doi; 2164219089Spjd enum ztest_io_type io_type; 2165219089Spjd uint64_t blocksize; 2166219089Spjd void *data; 2167219089Spjd 2168219089Spjd VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0); 2169219089Spjd blocksize = doi.doi_data_block_size; 2170219089Spjd data = umem_alloc(blocksize, UMEM_NOFAIL); 2171219089Spjd 2172219089Spjd /* 2173219089Spjd * Pick an i/o type at random, biased toward writing block tags. 2174219089Spjd */ 2175219089Spjd io_type = ztest_random(ZTEST_IO_TYPES); 2176219089Spjd if (ztest_random(2) == 0) 2177219089Spjd io_type = ZTEST_IO_WRITE_TAG; 2178219089Spjd 2179224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2180224526Smm 2181219089Spjd switch (io_type) { 2182219089Spjd 2183219089Spjd case ZTEST_IO_WRITE_TAG: 2184219089Spjd ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0); 2185219089Spjd (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt); 2186219089Spjd break; 2187219089Spjd 2188219089Spjd case ZTEST_IO_WRITE_PATTERN: 2189219089Spjd (void) memset(data, 'a' + (object + offset) % 5, blocksize); 2190219089Spjd if (ztest_random(2) == 0) { 2191219089Spjd /* 2192219089Spjd * Induce fletcher2 collisions to ensure that 2193219089Spjd * zio_ddt_collision() detects and resolves them 2194219089Spjd * when using fletcher2-verify for deduplication. 2195219089Spjd */ 2196219089Spjd ((uint64_t *)data)[0] ^= 1ULL << 63; 2197219089Spjd ((uint64_t *)data)[4] ^= 1ULL << 63; 2198219089Spjd } 2199219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2200219089Spjd break; 2201219089Spjd 2202219089Spjd case ZTEST_IO_WRITE_ZEROES: 2203219089Spjd bzero(data, blocksize); 2204219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2205219089Spjd break; 2206219089Spjd 2207219089Spjd case ZTEST_IO_TRUNCATE: 2208219089Spjd (void) ztest_truncate(zd, object, offset, blocksize); 2209219089Spjd break; 2210219089Spjd 2211219089Spjd case ZTEST_IO_SETATTR: 2212219089Spjd (void) ztest_setattr(zd, object); 2213219089Spjd break; 2214243524Smm 2215243524Smm case ZTEST_IO_REWRITE: 2216243524Smm (void) rw_rdlock(&ztest_name_lock); 2217243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2218243524Smm ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa), 2219243524Smm B_FALSE); 2220243524Smm VERIFY(err == 0 || err == ENOSPC); 2221243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2222243524Smm ZFS_PROP_COMPRESSION, 2223243524Smm ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), 2224243524Smm B_FALSE); 2225243524Smm VERIFY(err == 0 || err == ENOSPC); 2226243524Smm (void) rw_unlock(&ztest_name_lock); 2227243524Smm 2228243524Smm VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, 2229243524Smm DMU_READ_NO_PREFETCH)); 2230243524Smm 2231243524Smm (void) ztest_write(zd, object, offset, blocksize, data); 2232243524Smm break; 2233219089Spjd } 2234219089Spjd 2235224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2236224526Smm 2237219089Spjd umem_free(data, blocksize); 2238219089Spjd} 2239219089Spjd 2240219089Spjd/* 2241219089Spjd * Initialize an object description template. 2242219089Spjd */ 2243219089Spjdstatic void 2244219089Spjdztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index, 2245219089Spjd dmu_object_type_t type, uint64_t blocksize, uint64_t gen) 2246219089Spjd{ 2247219089Spjd od->od_dir = ZTEST_DIROBJ; 2248219089Spjd od->od_object = 0; 2249219089Spjd 2250219089Spjd od->od_crtype = type; 2251219089Spjd od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize(); 2252219089Spjd od->od_crgen = gen; 2253219089Spjd 2254219089Spjd od->od_type = DMU_OT_NONE; 2255219089Spjd od->od_blocksize = 0; 2256219089Spjd od->od_gen = 0; 2257219089Spjd 2258219089Spjd (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]", 2259219089Spjd tag, (int64_t)id, index); 2260219089Spjd} 2261219089Spjd 2262219089Spjd/* 2263219089Spjd * Lookup or create the objects for a test using the od template. 2264219089Spjd * If the objects do not all exist, or if 'remove' is specified, 2265219089Spjd * remove any existing objects and create new ones. Otherwise, 2266219089Spjd * use the existing objects. 2267219089Spjd */ 2268219089Spjdstatic int 2269219089Spjdztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) 2270219089Spjd{ 2271219089Spjd int count = size / sizeof (*od); 2272219089Spjd int rv = 0; 2273219089Spjd 2274219089Spjd VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0); 2275219089Spjd if ((ztest_lookup(zd, od, count) != 0 || remove) && 2276219089Spjd (ztest_remove(zd, od, count) != 0 || 2277219089Spjd ztest_create(zd, od, count) != 0)) 2278219089Spjd rv = -1; 2279219089Spjd zd->zd_od = od; 2280219089Spjd VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2281219089Spjd 2282219089Spjd return (rv); 2283219089Spjd} 2284219089Spjd 2285219089Spjd/* ARGSUSED */ 2286219089Spjdvoid 2287219089Spjdztest_zil_commit(ztest_ds_t *zd, uint64_t id) 2288219089Spjd{ 2289219089Spjd zilog_t *zilog = zd->zd_zilog; 2290219089Spjd 2291224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2292224526Smm 2293219089Spjd zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); 2294219089Spjd 2295219089Spjd /* 2296219089Spjd * Remember the committed values in zd, which is in parent/child 2297219089Spjd * shared memory. If we die, the next iteration of ztest_run() 2298219089Spjd * will verify that the log really does contain this record. 2299219089Spjd */ 2300219089Spjd mutex_enter(&zilog->zl_lock); 2301236143Smm ASSERT(zd->zd_shared != NULL); 2302236143Smm ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq); 2303236143Smm zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq; 2304219089Spjd mutex_exit(&zilog->zl_lock); 2305224526Smm 2306224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2307219089Spjd} 2308219089Spjd 2309219089Spjd/* 2310224526Smm * This function is designed to simulate the operations that occur during a 2311224526Smm * mount/unmount operation. We hold the dataset across these operations in an 2312224526Smm * attempt to expose any implicit assumptions about ZIL management. 2313224526Smm */ 2314224526Smm/* ARGSUSED */ 2315224526Smmvoid 2316224526Smmztest_zil_remount(ztest_ds_t *zd, uint64_t id) 2317224526Smm{ 2318224526Smm objset_t *os = zd->zd_os; 2319224526Smm 2320243524Smm /* 2321243524Smm * We grab the zd_dirobj_lock to ensure that no other thread is 2322243524Smm * updating the zil (i.e. adding in-memory log records) and the 2323243524Smm * zd_zilog_lock to block any I/O. 2324243524Smm */ 2325243524Smm VERIFY0(mutex_lock(&zd->zd_dirobj_lock)); 2326224526Smm (void) rw_wrlock(&zd->zd_zilog_lock); 2327224526Smm 2328224526Smm /* zfsvfs_teardown() */ 2329224526Smm zil_close(zd->zd_zilog); 2330224526Smm 2331224526Smm /* zfsvfs_setup() */ 2332224526Smm VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); 2333224526Smm zil_replay(os, zd, ztest_replay_vector); 2334224526Smm 2335224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2336239620Smm VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2337224526Smm} 2338224526Smm 2339224526Smm/* 2340168404Spjd * Verify that we can't destroy an active pool, create an existing pool, 2341168404Spjd * or create a pool with a bad vdev spec. 2342168404Spjd */ 2343219089Spjd/* ARGSUSED */ 2344168404Spjdvoid 2345219089Spjdztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) 2346168404Spjd{ 2347236143Smm ztest_shared_opts_t *zo = &ztest_opts; 2348168404Spjd spa_t *spa; 2349168404Spjd nvlist_t *nvroot; 2350168404Spjd 2351168404Spjd /* 2352168404Spjd * Attempt to create using a bad file. 2353168404Spjd */ 2354243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2355219089Spjd VERIFY3U(ENOENT, ==, 2356248571Smm spa_create("ztest_bad_file", nvroot, NULL, NULL)); 2357168404Spjd nvlist_free(nvroot); 2358168404Spjd 2359168404Spjd /* 2360168404Spjd * Attempt to create using a bad mirror. 2361168404Spjd */ 2362243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); 2363219089Spjd VERIFY3U(ENOENT, ==, 2364248571Smm spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); 2365168404Spjd nvlist_free(nvroot); 2366168404Spjd 2367168404Spjd /* 2368168404Spjd * Attempt to create an existing pool. It shouldn't matter 2369168404Spjd * what's in the nvroot; we should fail with EEXIST. 2370168404Spjd */ 2371236143Smm (void) rw_rdlock(&ztest_name_lock); 2372243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2373248571Smm VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); 2374168404Spjd nvlist_free(nvroot); 2375236143Smm VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); 2376236143Smm VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); 2377219089Spjd spa_close(spa, FTAG); 2378168404Spjd 2379236143Smm (void) rw_unlock(&ztest_name_lock); 2380168404Spjd} 2381168404Spjd 2382243505Smm/* ARGSUSED */ 2383243505Smmvoid 2384243505Smmztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) 2385243505Smm{ 2386243505Smm spa_t *spa; 2387243505Smm uint64_t initial_version = SPA_VERSION_INITIAL; 2388243505Smm uint64_t version, newversion; 2389243505Smm nvlist_t *nvroot, *props; 2390243505Smm char *name; 2391243505Smm 2392243505Smm VERIFY0(mutex_lock(&ztest_vdev_lock)); 2393243505Smm name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool); 2394243505Smm 2395243505Smm /* 2396243505Smm * Clean up from previous runs. 2397243505Smm */ 2398243505Smm (void) spa_destroy(name); 2399243505Smm 2400243505Smm nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0, 2401243505Smm 0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1); 2402243505Smm 2403243505Smm /* 2404243505Smm * If we're configuring a RAIDZ device then make sure that the 2405243505Smm * the initial version is capable of supporting that feature. 2406243505Smm */ 2407243505Smm switch (ztest_opts.zo_raidz_parity) { 2408243505Smm case 0: 2409243505Smm case 1: 2410243505Smm initial_version = SPA_VERSION_INITIAL; 2411243505Smm break; 2412243505Smm case 2: 2413243505Smm initial_version = SPA_VERSION_RAIDZ2; 2414243505Smm break; 2415243505Smm case 3: 2416243505Smm initial_version = SPA_VERSION_RAIDZ3; 2417243505Smm break; 2418243505Smm } 2419243505Smm 2420243505Smm /* 2421243505Smm * Create a pool with a spa version that can be upgraded. Pick 2422243505Smm * a value between initial_version and SPA_VERSION_BEFORE_FEATURES. 2423243505Smm */ 2424243505Smm do { 2425243505Smm version = ztest_random_spa_version(initial_version); 2426243505Smm } while (version > SPA_VERSION_BEFORE_FEATURES); 2427243505Smm 2428243505Smm props = fnvlist_alloc(); 2429243505Smm fnvlist_add_uint64(props, 2430243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION), version); 2431248571Smm VERIFY0(spa_create(name, nvroot, props, NULL)); 2432243505Smm fnvlist_free(nvroot); 2433243505Smm fnvlist_free(props); 2434243505Smm 2435243505Smm VERIFY0(spa_open(name, &spa, FTAG)); 2436243505Smm VERIFY3U(spa_version(spa), ==, version); 2437243505Smm newversion = ztest_random_spa_version(version + 1); 2438243505Smm 2439243505Smm if (ztest_opts.zo_verbose >= 4) { 2440243505Smm (void) printf("upgrading spa version from %llu to %llu\n", 2441243505Smm (u_longlong_t)version, (u_longlong_t)newversion); 2442243505Smm } 2443243505Smm 2444243505Smm spa_upgrade(spa, newversion); 2445243505Smm VERIFY3U(spa_version(spa), >, version); 2446243505Smm VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config, 2447243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION))); 2448243505Smm spa_close(spa, FTAG); 2449243505Smm 2450243505Smm strfree(name); 2451243505Smm VERIFY0(mutex_unlock(&ztest_vdev_lock)); 2452243505Smm} 2453243505Smm 2454185029Spjdstatic vdev_t * 2455185029Spjdvdev_lookup_by_path(vdev_t *vd, const char *path) 2456185029Spjd{ 2457185029Spjd vdev_t *mvd; 2458185029Spjd 2459185029Spjd if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) 2460185029Spjd return (vd); 2461185029Spjd 2462185029Spjd for (int c = 0; c < vd->vdev_children; c++) 2463185029Spjd if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != 2464185029Spjd NULL) 2465185029Spjd return (mvd); 2466185029Spjd 2467185029Spjd return (NULL); 2468185029Spjd} 2469185029Spjd 2470168404Spjd/* 2471219089Spjd * Find the first available hole which can be used as a top-level. 2472219089Spjd */ 2473219089Spjdint 2474219089Spjdfind_vdev_hole(spa_t *spa) 2475219089Spjd{ 2476219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2477219089Spjd int c; 2478219089Spjd 2479219089Spjd ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV); 2480219089Spjd 2481219089Spjd for (c = 0; c < rvd->vdev_children; c++) { 2482219089Spjd vdev_t *cvd = rvd->vdev_child[c]; 2483219089Spjd 2484219089Spjd if (cvd->vdev_ishole) 2485219089Spjd break; 2486219089Spjd } 2487219089Spjd return (c); 2488219089Spjd} 2489219089Spjd 2490219089Spjd/* 2491168404Spjd * Verify that vdev_add() works as expected. 2492168404Spjd */ 2493219089Spjd/* ARGSUSED */ 2494168404Spjdvoid 2495219089Spjdztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) 2496168404Spjd{ 2497219089Spjd ztest_shared_t *zs = ztest_shared; 2498236143Smm spa_t *spa = ztest_spa; 2499219089Spjd uint64_t leaves; 2500219089Spjd uint64_t guid; 2501168404Spjd nvlist_t *nvroot; 2502168404Spjd int error; 2503168404Spjd 2504236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2505248571Smm leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; 2506168404Spjd 2507185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2508168404Spjd 2509219089Spjd ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; 2510168404Spjd 2511185029Spjd /* 2512219089Spjd * If we have slogs then remove them 1/4 of the time. 2513185029Spjd */ 2514219089Spjd if (spa_has_slogs(spa) && ztest_random(4) == 0) { 2515219089Spjd /* 2516219089Spjd * Grab the guid from the head of the log class rotor. 2517219089Spjd */ 2518219089Spjd guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid; 2519185029Spjd 2520219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2521168404Spjd 2522219089Spjd /* 2523219089Spjd * We have to grab the zs_name_lock as writer to 2524219089Spjd * prevent a race between removing a slog (dmu_objset_find) 2525219089Spjd * and destroying a dataset. Removing the slog will 2526219089Spjd * grab a reference on the dataset which may cause 2527219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 2528219089Spjd * leaving the dataset in an inconsistent state. 2529219089Spjd */ 2530236143Smm VERIFY(rw_wrlock(&ztest_name_lock) == 0); 2531219089Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2532236143Smm VERIFY(rw_unlock(&ztest_name_lock) == 0); 2533168404Spjd 2534219089Spjd if (error && error != EEXIST) 2535219089Spjd fatal(0, "spa_vdev_remove() = %d", error); 2536219089Spjd } else { 2537219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2538219089Spjd 2539219089Spjd /* 2540219089Spjd * Make 1/4 of the devices be log devices. 2541219089Spjd */ 2542243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, 2543236143Smm ztest_opts.zo_vdev_size, 0, 2544236143Smm ztest_random(4) == 0, ztest_opts.zo_raidz, 2545236143Smm zs->zs_mirrors, 1); 2546219089Spjd 2547219089Spjd error = spa_vdev_add(spa, nvroot); 2548219089Spjd nvlist_free(nvroot); 2549219089Spjd 2550219089Spjd if (error == ENOSPC) 2551219089Spjd ztest_record_enospc("spa_vdev_add"); 2552219089Spjd else if (error != 0) 2553219089Spjd fatal(0, "spa_vdev_add() = %d", error); 2554219089Spjd } 2555219089Spjd 2556236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2557168404Spjd} 2558168404Spjd 2559185029Spjd/* 2560185029Spjd * Verify that adding/removing aux devices (l2arc, hot spare) works as expected. 2561185029Spjd */ 2562219089Spjd/* ARGSUSED */ 2563185029Spjdvoid 2564219089Spjdztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) 2565168404Spjd{ 2566219089Spjd ztest_shared_t *zs = ztest_shared; 2567236143Smm spa_t *spa = ztest_spa; 2568185029Spjd vdev_t *rvd = spa->spa_root_vdev; 2569185029Spjd spa_aux_vdev_t *sav; 2570185029Spjd char *aux; 2571185029Spjd uint64_t guid = 0; 2572185029Spjd int error; 2573168404Spjd 2574185029Spjd if (ztest_random(2) == 0) { 2575185029Spjd sav = &spa->spa_spares; 2576185029Spjd aux = ZPOOL_CONFIG_SPARES; 2577185029Spjd } else { 2578185029Spjd sav = &spa->spa_l2cache; 2579185029Spjd aux = ZPOOL_CONFIG_L2CACHE; 2580185029Spjd } 2581185029Spjd 2582236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2583185029Spjd 2584185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2585185029Spjd 2586185029Spjd if (sav->sav_count != 0 && ztest_random(4) == 0) { 2587185029Spjd /* 2588185029Spjd * Pick a random device to remove. 2589185029Spjd */ 2590185029Spjd guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid; 2591185029Spjd } else { 2592185029Spjd /* 2593185029Spjd * Find an unused device we can add. 2594185029Spjd */ 2595219089Spjd zs->zs_vdev_aux = 0; 2596185029Spjd for (;;) { 2597185029Spjd char path[MAXPATHLEN]; 2598185029Spjd int c; 2599236143Smm (void) snprintf(path, sizeof (path), ztest_aux_template, 2600236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, aux, 2601236143Smm zs->zs_vdev_aux); 2602185029Spjd for (c = 0; c < sav->sav_count; c++) 2603185029Spjd if (strcmp(sav->sav_vdevs[c]->vdev_path, 2604185029Spjd path) == 0) 2605185029Spjd break; 2606185029Spjd if (c == sav->sav_count && 2607185029Spjd vdev_lookup_by_path(rvd, path) == NULL) 2608185029Spjd break; 2609219089Spjd zs->zs_vdev_aux++; 2610168404Spjd } 2611168404Spjd } 2612168404Spjd 2613185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2614168404Spjd 2615185029Spjd if (guid == 0) { 2616185029Spjd /* 2617185029Spjd * Add a new device. 2618185029Spjd */ 2619243505Smm nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL, 2620236143Smm (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1); 2621185029Spjd error = spa_vdev_add(spa, nvroot); 2622185029Spjd if (error != 0) 2623185029Spjd fatal(0, "spa_vdev_add(%p) = %d", nvroot, error); 2624185029Spjd nvlist_free(nvroot); 2625185029Spjd } else { 2626185029Spjd /* 2627185029Spjd * Remove an existing device. Sometimes, dirty its 2628185029Spjd * vdev state first to make sure we handle removal 2629185029Spjd * of devices that have pending state changes. 2630185029Spjd */ 2631185029Spjd if (ztest_random(2) == 0) 2632219089Spjd (void) vdev_online(spa, guid, 0, NULL); 2633185029Spjd 2634185029Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2635185029Spjd if (error != 0 && error != EBUSY) 2636185029Spjd fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); 2637185029Spjd } 2638185029Spjd 2639236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2640168404Spjd} 2641168404Spjd 2642168404Spjd/* 2643219089Spjd * split a pool if it has mirror tlvdevs 2644219089Spjd */ 2645219089Spjd/* ARGSUSED */ 2646219089Spjdvoid 2647219089Spjdztest_split_pool(ztest_ds_t *zd, uint64_t id) 2648219089Spjd{ 2649219089Spjd ztest_shared_t *zs = ztest_shared; 2650236143Smm spa_t *spa = ztest_spa; 2651219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2652219089Spjd nvlist_t *tree, **child, *config, *split, **schild; 2653219089Spjd uint_t c, children, schildren = 0, lastlogid = 0; 2654219089Spjd int error = 0; 2655219089Spjd 2656236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2657219089Spjd 2658219089Spjd /* ensure we have a useable config; mirrors of raidz aren't supported */ 2659236143Smm if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) { 2660236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2661219089Spjd return; 2662219089Spjd } 2663219089Spjd 2664219089Spjd /* clean up the old pool, if any */ 2665219089Spjd (void) spa_destroy("splitp"); 2666219089Spjd 2667219089Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2668219089Spjd 2669219089Spjd /* generate a config from the existing config */ 2670219089Spjd mutex_enter(&spa->spa_props_lock); 2671219089Spjd VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE, 2672219089Spjd &tree) == 0); 2673219089Spjd mutex_exit(&spa->spa_props_lock); 2674219089Spjd 2675219089Spjd VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, 2676219089Spjd &children) == 0); 2677219089Spjd 2678219089Spjd schild = malloc(rvd->vdev_children * sizeof (nvlist_t *)); 2679219089Spjd for (c = 0; c < children; c++) { 2680219089Spjd vdev_t *tvd = rvd->vdev_child[c]; 2681219089Spjd nvlist_t **mchild; 2682219089Spjd uint_t mchildren; 2683219089Spjd 2684219089Spjd if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) { 2685219089Spjd VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME, 2686219089Spjd 0) == 0); 2687219089Spjd VERIFY(nvlist_add_string(schild[schildren], 2688219089Spjd ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0); 2689219089Spjd VERIFY(nvlist_add_uint64(schild[schildren], 2690219089Spjd ZPOOL_CONFIG_IS_HOLE, 1) == 0); 2691219089Spjd if (lastlogid == 0) 2692219089Spjd lastlogid = schildren; 2693219089Spjd ++schildren; 2694219089Spjd continue; 2695219089Spjd } 2696219089Spjd lastlogid = 0; 2697219089Spjd VERIFY(nvlist_lookup_nvlist_array(child[c], 2698219089Spjd ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); 2699219089Spjd VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0); 2700219089Spjd } 2701219089Spjd 2702219089Spjd /* OK, create a config that can be used to split */ 2703219089Spjd VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0); 2704219089Spjd VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE, 2705219089Spjd VDEV_TYPE_ROOT) == 0); 2706219089Spjd VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild, 2707219089Spjd lastlogid != 0 ? lastlogid : schildren) == 0); 2708219089Spjd 2709219089Spjd VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0); 2710219089Spjd VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0); 2711219089Spjd 2712219089Spjd for (c = 0; c < schildren; c++) 2713219089Spjd nvlist_free(schild[c]); 2714219089Spjd free(schild); 2715219089Spjd nvlist_free(split); 2716219089Spjd 2717219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2718219089Spjd 2719236143Smm (void) rw_wrlock(&ztest_name_lock); 2720219089Spjd error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); 2721236143Smm (void) rw_unlock(&ztest_name_lock); 2722219089Spjd 2723219089Spjd nvlist_free(config); 2724219089Spjd 2725219089Spjd if (error == 0) { 2726219089Spjd (void) printf("successful split - results:\n"); 2727219089Spjd mutex_enter(&spa_namespace_lock); 2728219089Spjd show_pool_stats(spa); 2729219089Spjd show_pool_stats(spa_lookup("splitp")); 2730219089Spjd mutex_exit(&spa_namespace_lock); 2731219089Spjd ++zs->zs_splits; 2732219089Spjd --zs->zs_mirrors; 2733219089Spjd } 2734236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2735219089Spjd 2736219089Spjd} 2737219089Spjd 2738219089Spjd/* 2739168404Spjd * Verify that we can attach and detach devices. 2740168404Spjd */ 2741219089Spjd/* ARGSUSED */ 2742168404Spjdvoid 2743219089Spjdztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) 2744168404Spjd{ 2745219089Spjd ztest_shared_t *zs = ztest_shared; 2746236143Smm spa_t *spa = ztest_spa; 2747185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 2748168404Spjd vdev_t *rvd = spa->spa_root_vdev; 2749168404Spjd vdev_t *oldvd, *newvd, *pvd; 2750185029Spjd nvlist_t *root; 2751219089Spjd uint64_t leaves; 2752168404Spjd uint64_t leaf, top; 2753168404Spjd uint64_t ashift = ztest_get_ashift(); 2754209962Smm uint64_t oldguid, pguid; 2755254112Sdelphij uint64_t oldsize, newsize; 2756168404Spjd char oldpath[MAXPATHLEN], newpath[MAXPATHLEN]; 2757168404Spjd int replacing; 2758185029Spjd int oldvd_has_siblings = B_FALSE; 2759185029Spjd int newvd_is_spare = B_FALSE; 2760185029Spjd int oldvd_is_log; 2761168404Spjd int error, expected_error; 2762168404Spjd 2763236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2764236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 2765168404Spjd 2766185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2767168404Spjd 2768168404Spjd /* 2769168404Spjd * Decide whether to do an attach or a replace. 2770168404Spjd */ 2771168404Spjd replacing = ztest_random(2); 2772168404Spjd 2773168404Spjd /* 2774168404Spjd * Pick a random top-level vdev. 2775168404Spjd */ 2776219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 2777168404Spjd 2778168404Spjd /* 2779168404Spjd * Pick a random leaf within it. 2780168404Spjd */ 2781168404Spjd leaf = ztest_random(leaves); 2782168404Spjd 2783168404Spjd /* 2784185029Spjd * Locate this vdev. 2785168404Spjd */ 2786185029Spjd oldvd = rvd->vdev_child[top]; 2787219089Spjd if (zs->zs_mirrors >= 1) { 2788209962Smm ASSERT(oldvd->vdev_ops == &vdev_mirror_ops); 2789219089Spjd ASSERT(oldvd->vdev_children >= zs->zs_mirrors); 2790236143Smm oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz]; 2791209962Smm } 2792236143Smm if (ztest_opts.zo_raidz > 1) { 2793209962Smm ASSERT(oldvd->vdev_ops == &vdev_raidz_ops); 2794236143Smm ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz); 2795236143Smm oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz]; 2796209962Smm } 2797168404Spjd 2798168404Spjd /* 2799185029Spjd * If we're already doing an attach or replace, oldvd may be a 2800185029Spjd * mirror vdev -- in which case, pick a random child. 2801168404Spjd */ 2802185029Spjd while (oldvd->vdev_children != 0) { 2803185029Spjd oldvd_has_siblings = B_TRUE; 2804209962Smm ASSERT(oldvd->vdev_children >= 2); 2805209962Smm oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)]; 2806185029Spjd } 2807168404Spjd 2808185029Spjd oldguid = oldvd->vdev_guid; 2809219089Spjd oldsize = vdev_get_min_asize(oldvd); 2810185029Spjd oldvd_is_log = oldvd->vdev_top->vdev_islog; 2811185029Spjd (void) strcpy(oldpath, oldvd->vdev_path); 2812185029Spjd pvd = oldvd->vdev_parent; 2813209962Smm pguid = pvd->vdev_guid; 2814185029Spjd 2815168404Spjd /* 2816185029Spjd * If oldvd has siblings, then half of the time, detach it. 2817168404Spjd */ 2818185029Spjd if (oldvd_has_siblings && ztest_random(2) == 0) { 2819185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2820209962Smm error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE); 2821209962Smm if (error != 0 && error != ENODEV && error != EBUSY && 2822209962Smm error != ENOTSUP) 2823209962Smm fatal(0, "detach (%s) returned %d", oldpath, error); 2824236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2825185029Spjd return; 2826185029Spjd } 2827168404Spjd 2828168404Spjd /* 2829185029Spjd * For the new vdev, choose with equal probability between the two 2830185029Spjd * standard paths (ending in either 'a' or 'b') or a random hot spare. 2831168404Spjd */ 2832185029Spjd if (sav->sav_count != 0 && ztest_random(3) == 0) { 2833185029Spjd newvd = sav->sav_vdevs[ztest_random(sav->sav_count)]; 2834185029Spjd newvd_is_spare = B_TRUE; 2835185029Spjd (void) strcpy(newpath, newvd->vdev_path); 2836185029Spjd } else { 2837185029Spjd (void) snprintf(newpath, sizeof (newpath), ztest_dev_template, 2838236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 2839236143Smm top * leaves + leaf); 2840185029Spjd if (ztest_random(2) == 0) 2841185029Spjd newpath[strlen(newpath) - 1] = 'b'; 2842185029Spjd newvd = vdev_lookup_by_path(rvd, newpath); 2843185029Spjd } 2844168404Spjd 2845185029Spjd if (newvd) { 2846219089Spjd newsize = vdev_get_min_asize(newvd); 2847185029Spjd } else { 2848185029Spjd /* 2849185029Spjd * Make newsize a little bigger or smaller than oldsize. 2850185029Spjd * If it's smaller, the attach should fail. 2851185029Spjd * If it's larger, and we're doing a replace, 2852185029Spjd * we should get dynamic LUN growth when we're done. 2853185029Spjd */ 2854185029Spjd newsize = 10 * oldsize / (9 + ztest_random(3)); 2855185029Spjd } 2856185029Spjd 2857168404Spjd /* 2858168404Spjd * If pvd is not a mirror or root, the attach should fail with ENOTSUP, 2859168404Spjd * unless it's a replace; in that case any non-replacing parent is OK. 2860168404Spjd * 2861168404Spjd * If newvd is already part of the pool, it should fail with EBUSY. 2862168404Spjd * 2863168404Spjd * If newvd is too small, it should fail with EOVERFLOW. 2864168404Spjd */ 2865185029Spjd if (pvd->vdev_ops != &vdev_mirror_ops && 2866185029Spjd pvd->vdev_ops != &vdev_root_ops && (!replacing || 2867185029Spjd pvd->vdev_ops == &vdev_replacing_ops || 2868185029Spjd pvd->vdev_ops == &vdev_spare_ops)) 2869185029Spjd expected_error = ENOTSUP; 2870185029Spjd else if (newvd_is_spare && (!replacing || oldvd_is_log)) 2871185029Spjd expected_error = ENOTSUP; 2872185029Spjd else if (newvd == oldvd) 2873185029Spjd expected_error = replacing ? 0 : EBUSY; 2874185029Spjd else if (vdev_lookup_by_path(rvd, newpath) != NULL) 2875168404Spjd expected_error = EBUSY; 2876168404Spjd else if (newsize < oldsize) 2877168404Spjd expected_error = EOVERFLOW; 2878168404Spjd else if (ashift > oldvd->vdev_top->vdev_ashift) 2879168404Spjd expected_error = EDOM; 2880168404Spjd else 2881168404Spjd expected_error = 0; 2882168404Spjd 2883185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2884168404Spjd 2885168404Spjd /* 2886168404Spjd * Build the nvlist describing newpath. 2887168404Spjd */ 2888243505Smm root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0, 2889185029Spjd ashift, 0, 0, 0, 1); 2890168404Spjd 2891185029Spjd error = spa_vdev_attach(spa, oldguid, root, replacing); 2892168404Spjd 2893168404Spjd nvlist_free(root); 2894168404Spjd 2895168404Spjd /* 2896168404Spjd * If our parent was the replacing vdev, but the replace completed, 2897168404Spjd * then instead of failing with ENOTSUP we may either succeed, 2898168404Spjd * fail with ENODEV, or fail with EOVERFLOW. 2899168404Spjd */ 2900168404Spjd if (expected_error == ENOTSUP && 2901168404Spjd (error == 0 || error == ENODEV || error == EOVERFLOW)) 2902168404Spjd expected_error = error; 2903168404Spjd 2904168404Spjd /* 2905168404Spjd * If someone grew the LUN, the replacement may be too small. 2906168404Spjd */ 2907185029Spjd if (error == EOVERFLOW || error == EBUSY) 2908168404Spjd expected_error = error; 2909168404Spjd 2910185029Spjd /* XXX workaround 6690467 */ 2911185029Spjd if (error != expected_error && expected_error != EBUSY) { 2912185029Spjd fatal(0, "attach (%s %llu, %s %llu, %d) " 2913185029Spjd "returned %d, expected %d", 2914254112Sdelphij oldpath, oldsize, newpath, 2915254112Sdelphij newsize, replacing, error, expected_error); 2916168404Spjd } 2917168404Spjd 2918236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2919168404Spjd} 2920168404Spjd 2921168404Spjd/* 2922219089Spjd * Callback function which expands the physical size of the vdev. 2923168404Spjd */ 2924219089Spjdvdev_t * 2925219089Spjdgrow_vdev(vdev_t *vd, void *arg) 2926168404Spjd{ 2927219089Spjd spa_t *spa = vd->vdev_spa; 2928219089Spjd size_t *newsize = arg; 2929168404Spjd size_t fsize; 2930168404Spjd int fd; 2931168404Spjd 2932219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2933219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2934168404Spjd 2935219089Spjd if ((fd = open(vd->vdev_path, O_RDWR)) == -1) 2936219089Spjd return (vd); 2937219089Spjd 2938219089Spjd fsize = lseek(fd, 0, SEEK_END); 2939219089Spjd (void) ftruncate(fd, *newsize); 2940219089Spjd 2941236143Smm if (ztest_opts.zo_verbose >= 6) { 2942219089Spjd (void) printf("%s grew from %lu to %lu bytes\n", 2943219089Spjd vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize); 2944219089Spjd } 2945219089Spjd (void) close(fd); 2946219089Spjd return (NULL); 2947219089Spjd} 2948219089Spjd 2949219089Spjd/* 2950219089Spjd * Callback function which expands a given vdev by calling vdev_online(). 2951219089Spjd */ 2952219089Spjd/* ARGSUSED */ 2953219089Spjdvdev_t * 2954219089Spjdonline_vdev(vdev_t *vd, void *arg) 2955219089Spjd{ 2956219089Spjd spa_t *spa = vd->vdev_spa; 2957219089Spjd vdev_t *tvd = vd->vdev_top; 2958219089Spjd uint64_t guid = vd->vdev_guid; 2959219089Spjd uint64_t generation = spa->spa_config_generation + 1; 2960219089Spjd vdev_state_t newstate = VDEV_STATE_UNKNOWN; 2961219089Spjd int error; 2962219089Spjd 2963219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2964219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2965219089Spjd 2966219089Spjd /* Calling vdev_online will initialize the new metaslabs */ 2967219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2968219089Spjd error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate); 2969219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 2970219089Spjd 2971168404Spjd /* 2972219089Spjd * If vdev_online returned an error or the underlying vdev_open 2973219089Spjd * failed then we abort the expand. The only way to know that 2974219089Spjd * vdev_open fails is by checking the returned newstate. 2975168404Spjd */ 2976219089Spjd if (error || newstate != VDEV_STATE_HEALTHY) { 2977236143Smm if (ztest_opts.zo_verbose >= 5) { 2978219089Spjd (void) printf("Unable to expand vdev, state %llu, " 2979219089Spjd "error %d\n", (u_longlong_t)newstate, error); 2980219089Spjd } 2981219089Spjd return (vd); 2982219089Spjd } 2983219089Spjd ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY); 2984168404Spjd 2985219089Spjd /* 2986219089Spjd * Since we dropped the lock we need to ensure that we're 2987219089Spjd * still talking to the original vdev. It's possible this 2988219089Spjd * vdev may have been detached/replaced while we were 2989219089Spjd * trying to online it. 2990219089Spjd */ 2991219089Spjd if (generation != spa->spa_config_generation) { 2992236143Smm if (ztest_opts.zo_verbose >= 5) { 2993219089Spjd (void) printf("vdev configuration has changed, " 2994219089Spjd "guid %llu, state %llu, expected gen %llu, " 2995219089Spjd "got gen %llu\n", 2996219089Spjd (u_longlong_t)guid, 2997219089Spjd (u_longlong_t)tvd->vdev_state, 2998219089Spjd (u_longlong_t)generation, 2999219089Spjd (u_longlong_t)spa->spa_config_generation); 3000219089Spjd } 3001219089Spjd return (vd); 3002219089Spjd } 3003219089Spjd return (NULL); 3004219089Spjd} 3005168404Spjd 3006219089Spjd/* 3007219089Spjd * Traverse the vdev tree calling the supplied function. 3008219089Spjd * We continue to walk the tree until we either have walked all 3009219089Spjd * children or we receive a non-NULL return from the callback. 3010219089Spjd * If a NULL callback is passed, then we just return back the first 3011219089Spjd * leaf vdev we encounter. 3012219089Spjd */ 3013219089Spjdvdev_t * 3014219089Spjdvdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg) 3015219089Spjd{ 3016219089Spjd if (vd->vdev_ops->vdev_op_leaf) { 3017219089Spjd if (func == NULL) 3018219089Spjd return (vd); 3019219089Spjd else 3020219089Spjd return (func(vd, arg)); 3021219089Spjd } 3022168404Spjd 3023219089Spjd for (uint_t c = 0; c < vd->vdev_children; c++) { 3024219089Spjd vdev_t *cvd = vd->vdev_child[c]; 3025219089Spjd if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL) 3026219089Spjd return (cvd); 3027219089Spjd } 3028219089Spjd return (NULL); 3029219089Spjd} 3030219089Spjd 3031219089Spjd/* 3032219089Spjd * Verify that dynamic LUN growth works as expected. 3033219089Spjd */ 3034219089Spjd/* ARGSUSED */ 3035219089Spjdvoid 3036219089Spjdztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) 3037219089Spjd{ 3038236143Smm spa_t *spa = ztest_spa; 3039219089Spjd vdev_t *vd, *tvd; 3040219089Spjd metaslab_class_t *mc; 3041219089Spjd metaslab_group_t *mg; 3042219089Spjd size_t psize, newsize; 3043219089Spjd uint64_t top; 3044219089Spjd uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; 3045219089Spjd 3046236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 3047219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3048219089Spjd 3049219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 3050219089Spjd 3051219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3052219089Spjd mg = tvd->vdev_mg; 3053219089Spjd mc = mg->mg_class; 3054219089Spjd old_ms_count = tvd->vdev_ms_count; 3055219089Spjd old_class_space = metaslab_class_get_space(mc); 3056219089Spjd 3057219089Spjd /* 3058219089Spjd * Determine the size of the first leaf vdev associated with 3059219089Spjd * our top-level device. 3060219089Spjd */ 3061219089Spjd vd = vdev_walk_tree(tvd, NULL, NULL); 3062219089Spjd ASSERT3P(vd, !=, NULL); 3063219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 3064219089Spjd 3065219089Spjd psize = vd->vdev_psize; 3066219089Spjd 3067219089Spjd /* 3068219089Spjd * We only try to expand the vdev if it's healthy, less than 4x its 3069219089Spjd * original size, and it has a valid psize. 3070219089Spjd */ 3071219089Spjd if (tvd->vdev_state != VDEV_STATE_HEALTHY || 3072236143Smm psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) { 3073219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3074236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3075219089Spjd return; 3076219089Spjd } 3077219089Spjd ASSERT(psize > 0); 3078219089Spjd newsize = psize + psize / 8; 3079219089Spjd ASSERT3U(newsize, >, psize); 3080219089Spjd 3081236143Smm if (ztest_opts.zo_verbose >= 6) { 3082219089Spjd (void) printf("Expanding LUN %s from %lu to %lu\n", 3083219089Spjd vd->vdev_path, (ulong_t)psize, (ulong_t)newsize); 3084219089Spjd } 3085219089Spjd 3086219089Spjd /* 3087219089Spjd * Growing the vdev is a two step process: 3088219089Spjd * 1). expand the physical size (i.e. relabel) 3089219089Spjd * 2). online the vdev to create the new metaslabs 3090219089Spjd */ 3091219089Spjd if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL || 3092219089Spjd vdev_walk_tree(tvd, online_vdev, NULL) != NULL || 3093219089Spjd tvd->vdev_state != VDEV_STATE_HEALTHY) { 3094236143Smm if (ztest_opts.zo_verbose >= 5) { 3095219089Spjd (void) printf("Could not expand LUN because " 3096219089Spjd "the vdev configuration changed.\n"); 3097168404Spjd } 3098219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3099236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3100219089Spjd return; 3101168404Spjd } 3102168404Spjd 3103219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3104219089Spjd 3105219089Spjd /* 3106219089Spjd * Expanding the LUN will update the config asynchronously, 3107219089Spjd * thus we must wait for the async thread to complete any 3108219089Spjd * pending tasks before proceeding. 3109219089Spjd */ 3110219089Spjd for (;;) { 3111219089Spjd boolean_t done; 3112219089Spjd mutex_enter(&spa->spa_async_lock); 3113219089Spjd done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks); 3114219089Spjd mutex_exit(&spa->spa_async_lock); 3115219089Spjd if (done) 3116219089Spjd break; 3117219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 3118219089Spjd (void) poll(NULL, 0, 100); 3119219089Spjd } 3120219089Spjd 3121219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3122219089Spjd 3123219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3124219089Spjd new_ms_count = tvd->vdev_ms_count; 3125219089Spjd new_class_space = metaslab_class_get_space(mc); 3126219089Spjd 3127219089Spjd if (tvd->vdev_mg != mg || mg->mg_class != mc) { 3128236143Smm if (ztest_opts.zo_verbose >= 5) { 3129219089Spjd (void) printf("Could not verify LUN expansion due to " 3130219089Spjd "intervening vdev offline or remove.\n"); 3131219089Spjd } 3132219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3133236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3134219089Spjd return; 3135219089Spjd } 3136219089Spjd 3137219089Spjd /* 3138219089Spjd * Make sure we were able to grow the vdev. 3139219089Spjd */ 3140219089Spjd if (new_ms_count <= old_ms_count) 3141219089Spjd fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n", 3142219089Spjd old_ms_count, new_ms_count); 3143219089Spjd 3144219089Spjd /* 3145219089Spjd * Make sure we were able to grow the pool. 3146219089Spjd */ 3147219089Spjd if (new_class_space <= old_class_space) 3148219089Spjd fatal(0, "LUN expansion failed: class_space %llu <= %llu\n", 3149219089Spjd old_class_space, new_class_space); 3150219089Spjd 3151236143Smm if (ztest_opts.zo_verbose >= 5) { 3152219089Spjd char oldnumbuf[6], newnumbuf[6]; 3153219089Spjd 3154219089Spjd nicenum(old_class_space, oldnumbuf); 3155219089Spjd nicenum(new_class_space, newnumbuf); 3156219089Spjd (void) printf("%s grew from %s to %s\n", 3157219089Spjd spa->spa_name, oldnumbuf, newnumbuf); 3158219089Spjd } 3159219089Spjd 3160219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3161236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3162168404Spjd} 3163168404Spjd 3164219089Spjd/* 3165219089Spjd * Verify that dmu_objset_{create,destroy,open,close} work as expected. 3166219089Spjd */ 3167168404Spjd/* ARGSUSED */ 3168168404Spjdstatic void 3169219089Spjdztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 3170168404Spjd{ 3171168404Spjd /* 3172219089Spjd * Create the objects common to all ztest datasets. 3173168404Spjd */ 3174219089Spjd VERIFY(zap_create_claim(os, ZTEST_DIROBJ, 3175168404Spjd DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); 3176219089Spjd} 3177168404Spjd 3178219089Spjdstatic int 3179219089Spjdztest_dataset_create(char *dsname) 3180219089Spjd{ 3181219089Spjd uint64_t zilset = ztest_random(100); 3182219089Spjd int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, 3183219089Spjd ztest_objset_create_cb, NULL); 3184219089Spjd 3185219089Spjd if (err || zilset < 80) 3186219089Spjd return (err); 3187219089Spjd 3188236143Smm if (ztest_opts.zo_verbose >= 6) 3189236143Smm (void) printf("Setting dataset %s to sync always\n", dsname); 3190219089Spjd return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC, 3191219089Spjd ZFS_SYNC_ALWAYS, B_FALSE)); 3192168404Spjd} 3193168404Spjd 3194219089Spjd/* ARGSUSED */ 3195168404Spjdstatic int 3196219089Spjdztest_objset_destroy_cb(const char *name, void *arg) 3197168404Spjd{ 3198168404Spjd objset_t *os; 3199219089Spjd dmu_object_info_t doi; 3200168404Spjd int error; 3201168404Spjd 3202168404Spjd /* 3203168404Spjd * Verify that the dataset contains a directory object. 3204168404Spjd */ 3205248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); 3206219089Spjd error = dmu_object_info(os, ZTEST_DIROBJ, &doi); 3207168404Spjd if (error != ENOENT) { 3208168404Spjd /* We could have crashed in the middle of destroying it */ 3209240415Smm ASSERT0(error); 3210219089Spjd ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); 3211219089Spjd ASSERT3S(doi.doi_physical_blocks_512, >=, 0); 3212168404Spjd } 3213248571Smm dmu_objset_disown(os, FTAG); 3214168404Spjd 3215168404Spjd /* 3216168404Spjd * Destroy the dataset. 3217168404Spjd */ 3218248571Smm if (strchr(name, '@') != NULL) { 3219248571Smm VERIFY0(dsl_destroy_snapshot(name, B_FALSE)); 3220248571Smm } else { 3221248571Smm VERIFY0(dsl_destroy_head(name)); 3222248571Smm } 3223168404Spjd return (0); 3224168404Spjd} 3225168404Spjd 3226219089Spjdstatic boolean_t 3227219089Spjdztest_snapshot_create(char *osname, uint64_t id) 3228168404Spjd{ 3229219089Spjd char snapname[MAXNAMELEN]; 3230219089Spjd int error; 3231168404Spjd 3232248571Smm (void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id); 3233168404Spjd 3234248571Smm error = dmu_objset_snapshot_one(osname, snapname); 3235219089Spjd if (error == ENOSPC) { 3236219089Spjd ztest_record_enospc(FTAG); 3237219089Spjd return (B_FALSE); 3238219089Spjd } 3239248571Smm if (error != 0 && error != EEXIST) { 3240248571Smm fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname, 3241248571Smm snapname, error); 3242248571Smm } 3243219089Spjd return (B_TRUE); 3244219089Spjd} 3245168404Spjd 3246219089Spjdstatic boolean_t 3247219089Spjdztest_snapshot_destroy(char *osname, uint64_t id) 3248219089Spjd{ 3249219089Spjd char snapname[MAXNAMELEN]; 3250219089Spjd int error; 3251219089Spjd 3252219089Spjd (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, 3253219089Spjd (u_longlong_t)id); 3254219089Spjd 3255248571Smm error = dsl_destroy_snapshot(snapname, B_FALSE); 3256219089Spjd if (error != 0 && error != ENOENT) 3257219089Spjd fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error); 3258219089Spjd return (B_TRUE); 3259168404Spjd} 3260168404Spjd 3261219089Spjd/* ARGSUSED */ 3262168404Spjdvoid 3263219089Spjdztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) 3264168404Spjd{ 3265219089Spjd ztest_ds_t zdtmp; 3266219089Spjd int iters; 3267168404Spjd int error; 3268185029Spjd objset_t *os, *os2; 3269219089Spjd char name[MAXNAMELEN]; 3270168404Spjd zilog_t *zilog; 3271168404Spjd 3272236143Smm (void) rw_rdlock(&ztest_name_lock); 3273168404Spjd 3274219089Spjd (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", 3275236143Smm ztest_opts.zo_pool, (u_longlong_t)id); 3276168404Spjd 3277168404Spjd /* 3278168404Spjd * If this dataset exists from a previous run, process its replay log 3279168404Spjd * half of the time. If we don't replay it, then dmu_objset_destroy() 3280219089Spjd * (invoked from ztest_objset_destroy_cb()) should just throw it away. 3281168404Spjd */ 3282168404Spjd if (ztest_random(2) == 0 && 3283219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { 3284236143Smm ztest_zd_init(&zdtmp, NULL, os); 3285219089Spjd zil_replay(os, &zdtmp, ztest_replay_vector); 3286219089Spjd ztest_zd_fini(&zdtmp); 3287219089Spjd dmu_objset_disown(os, FTAG); 3288168404Spjd } 3289168404Spjd 3290168404Spjd /* 3291168404Spjd * There may be an old instance of the dataset we're about to 3292168404Spjd * create lying around from a previous run. If so, destroy it 3293168404Spjd * and all of its snapshots. 3294168404Spjd */ 3295219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 3296168404Spjd DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 3297168404Spjd 3298168404Spjd /* 3299168404Spjd * Verify that the destroyed dataset is no longer in the namespace. 3300168404Spjd */ 3301248571Smm VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, 3302248571Smm FTAG, &os)); 3303168404Spjd 3304168404Spjd /* 3305168404Spjd * Verify that we can create a new dataset. 3306168404Spjd */ 3307219089Spjd error = ztest_dataset_create(name); 3308168404Spjd if (error) { 3309168404Spjd if (error == ENOSPC) { 3310219089Spjd ztest_record_enospc(FTAG); 3311236143Smm (void) rw_unlock(&ztest_name_lock); 3312168404Spjd return; 3313168404Spjd } 3314168404Spjd fatal(0, "dmu_objset_create(%s) = %d", name, error); 3315168404Spjd } 3316168404Spjd 3317248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); 3318168404Spjd 3319236143Smm ztest_zd_init(&zdtmp, NULL, os); 3320219089Spjd 3321168404Spjd /* 3322168404Spjd * Open the intent log for it. 3323168404Spjd */ 3324219089Spjd zilog = zil_open(os, ztest_get_data); 3325168404Spjd 3326168404Spjd /* 3327219089Spjd * Put some objects in there, do a little I/O to them, 3328219089Spjd * and randomly take a couple of snapshots along the way. 3329168404Spjd */ 3330219089Spjd iters = ztest_random(5); 3331219089Spjd for (int i = 0; i < iters; i++) { 3332219089Spjd ztest_dmu_object_alloc_free(&zdtmp, id); 3333219089Spjd if (ztest_random(iters) == 0) 3334219089Spjd (void) ztest_snapshot_create(name, i); 3335168404Spjd } 3336168404Spjd 3337168404Spjd /* 3338168404Spjd * Verify that we cannot create an existing dataset. 3339168404Spjd */ 3340219089Spjd VERIFY3U(EEXIST, ==, 3341219089Spjd dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); 3342168404Spjd 3343168404Spjd /* 3344219089Spjd * Verify that we can hold an objset that is also owned. 3345168404Spjd */ 3346219089Spjd VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2)); 3347219089Spjd dmu_objset_rele(os2, FTAG); 3348168404Spjd 3349219089Spjd /* 3350219089Spjd * Verify that we cannot own an objset that is already owned. 3351219089Spjd */ 3352219089Spjd VERIFY3U(EBUSY, ==, 3353219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); 3354219089Spjd 3355168404Spjd zil_close(zilog); 3356219089Spjd dmu_objset_disown(os, FTAG); 3357219089Spjd ztest_zd_fini(&zdtmp); 3358168404Spjd 3359236143Smm (void) rw_unlock(&ztest_name_lock); 3360168404Spjd} 3361168404Spjd 3362168404Spjd/* 3363168404Spjd * Verify that dmu_snapshot_{create,destroy,open,close} work as expected. 3364168404Spjd */ 3365168404Spjdvoid 3366219089Spjdztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) 3367168404Spjd{ 3368236143Smm (void) rw_rdlock(&ztest_name_lock); 3369219089Spjd (void) ztest_snapshot_destroy(zd->zd_name, id); 3370219089Spjd (void) ztest_snapshot_create(zd->zd_name, id); 3371236143Smm (void) rw_unlock(&ztest_name_lock); 3372219089Spjd} 3373219089Spjd 3374219089Spjd/* 3375219089Spjd * Cleanup non-standard snapshots and clones. 3376219089Spjd */ 3377219089Spjdvoid 3378219089Spjdztest_dsl_dataset_cleanup(char *osname, uint64_t id) 3379219089Spjd{ 3380219089Spjd char snap1name[MAXNAMELEN]; 3381219089Spjd char clone1name[MAXNAMELEN]; 3382219089Spjd char snap2name[MAXNAMELEN]; 3383219089Spjd char clone2name[MAXNAMELEN]; 3384219089Spjd char snap3name[MAXNAMELEN]; 3385168404Spjd int error; 3386168404Spjd 3387219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3388219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3389219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3390219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3391219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3392168404Spjd 3393248571Smm error = dsl_destroy_head(clone2name); 3394219089Spjd if (error && error != ENOENT) 3395248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error); 3396248571Smm error = dsl_destroy_snapshot(snap3name, B_FALSE); 3397219089Spjd if (error && error != ENOENT) 3398248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error); 3399248571Smm error = dsl_destroy_snapshot(snap2name, B_FALSE); 3400219089Spjd if (error && error != ENOENT) 3401248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error); 3402248571Smm error = dsl_destroy_head(clone1name); 3403219089Spjd if (error && error != ENOENT) 3404248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error); 3405248571Smm error = dsl_destroy_snapshot(snap1name, B_FALSE); 3406219089Spjd if (error && error != ENOENT) 3407248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error); 3408168404Spjd} 3409168404Spjd 3410168404Spjd/* 3411207910Smm * Verify dsl_dataset_promote handles EBUSY 3412207910Smm */ 3413207910Smmvoid 3414219089Spjdztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) 3415207910Smm{ 3416248571Smm objset_t *os; 3417219089Spjd char snap1name[MAXNAMELEN]; 3418219089Spjd char clone1name[MAXNAMELEN]; 3419219089Spjd char snap2name[MAXNAMELEN]; 3420219089Spjd char clone2name[MAXNAMELEN]; 3421219089Spjd char snap3name[MAXNAMELEN]; 3422219089Spjd char *osname = zd->zd_name; 3423219089Spjd int error; 3424207910Smm 3425236143Smm (void) rw_rdlock(&ztest_name_lock); 3426207910Smm 3427219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3428207910Smm 3429219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3430219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3431219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3432219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3433219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3434207910Smm 3435248571Smm error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1); 3436209962Smm if (error && error != EEXIST) { 3437209962Smm if (error == ENOSPC) { 3438209962Smm ztest_record_enospc(FTAG); 3439209962Smm goto out; 3440209962Smm } 3441209962Smm fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); 3442209962Smm } 3443207910Smm 3444248571Smm error = dmu_objset_clone(clone1name, snap1name); 3445209962Smm if (error) { 3446209962Smm if (error == ENOSPC) { 3447209962Smm ztest_record_enospc(FTAG); 3448209962Smm goto out; 3449209962Smm } 3450207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); 3451209962Smm } 3452207910Smm 3453248571Smm error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1); 3454209962Smm if (error && error != EEXIST) { 3455209962Smm if (error == ENOSPC) { 3456209962Smm ztest_record_enospc(FTAG); 3457209962Smm goto out; 3458209962Smm } 3459209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); 3460209962Smm } 3461207910Smm 3462248571Smm error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1); 3463209962Smm if (error && error != EEXIST) { 3464209962Smm if (error == ENOSPC) { 3465209962Smm ztest_record_enospc(FTAG); 3466209962Smm goto out; 3467209962Smm } 3468209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); 3469209962Smm } 3470207910Smm 3471248571Smm error = dmu_objset_clone(clone2name, snap3name); 3472209962Smm if (error) { 3473209962Smm if (error == ENOSPC) { 3474219089Spjd ztest_record_enospc(FTAG); 3475209962Smm goto out; 3476209962Smm } 3477207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); 3478209962Smm } 3479207910Smm 3480248571Smm error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); 3481207910Smm if (error) 3482248571Smm fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); 3483219089Spjd error = dsl_dataset_promote(clone2name, NULL); 3484268075Sdelphij if (error == ENOSPC) { 3485268075Sdelphij dmu_objset_disown(os, FTAG); 3486268075Sdelphij ztest_record_enospc(FTAG); 3487268075Sdelphij goto out; 3488268075Sdelphij } 3489207910Smm if (error != EBUSY) 3490207910Smm fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, 3491207910Smm error); 3492248571Smm dmu_objset_disown(os, FTAG); 3493207910Smm 3494209962Smmout: 3495219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3496207910Smm 3497236143Smm (void) rw_unlock(&ztest_name_lock); 3498207910Smm} 3499207910Smm 3500207910Smm/* 3501168404Spjd * Verify that dmu_object_{alloc,free} work as expected. 3502168404Spjd */ 3503168404Spjdvoid 3504219089Spjdztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id) 3505168404Spjd{ 3506219089Spjd ztest_od_t od[4]; 3507219089Spjd int batchsize = sizeof (od) / sizeof (od[0]); 3508168404Spjd 3509219089Spjd for (int b = 0; b < batchsize; b++) 3510219089Spjd ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0); 3511168404Spjd 3512168404Spjd /* 3513219089Spjd * Destroy the previous batch of objects, create a new batch, 3514219089Spjd * and do some I/O on the new objects. 3515168404Spjd */ 3516219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0) 3517219089Spjd return; 3518168404Spjd 3519219089Spjd while (ztest_random(4 * batchsize) != 0) 3520219089Spjd ztest_io(zd, od[ztest_random(batchsize)].od_object, 3521219089Spjd ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 3522168404Spjd} 3523168404Spjd 3524168404Spjd/* 3525168404Spjd * Verify that dmu_{read,write} work as expected. 3526168404Spjd */ 3527168404Spjdvoid 3528219089Spjdztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) 3529168404Spjd{ 3530219089Spjd objset_t *os = zd->zd_os; 3531219089Spjd ztest_od_t od[2]; 3532168404Spjd dmu_tx_t *tx; 3533168404Spjd int i, freeit, error; 3534168404Spjd uint64_t n, s, txg; 3535168404Spjd bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT; 3536219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3537219089Spjd uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t); 3538168404Spjd uint64_t regions = 997; 3539168404Spjd uint64_t stride = 123456789ULL; 3540168404Spjd uint64_t width = 40; 3541168404Spjd int free_percent = 5; 3542168404Spjd 3543168404Spjd /* 3544168404Spjd * This test uses two objects, packobj and bigobj, that are always 3545168404Spjd * updated together (i.e. in the same tx) so that their contents are 3546168404Spjd * in sync and can be compared. Their contents relate to each other 3547168404Spjd * in a simple way: packobj is a dense array of 'bufwad' structures, 3548168404Spjd * while bigobj is a sparse array of the same bufwads. Specifically, 3549168404Spjd * for any index n, there are three bufwads that should be identical: 3550168404Spjd * 3551168404Spjd * packobj, at offset n * sizeof (bufwad_t) 3552168404Spjd * bigobj, at the head of the nth chunk 3553168404Spjd * bigobj, at the tail of the nth chunk 3554168404Spjd * 3555168404Spjd * The chunk size is arbitrary. It doesn't have to be a power of two, 3556168404Spjd * and it doesn't have any relation to the object blocksize. 3557168404Spjd * The only requirement is that it can hold at least two bufwads. 3558168404Spjd * 3559168404Spjd * Normally, we write the bufwad to each of these locations. 3560168404Spjd * However, free_percent of the time we instead write zeroes to 3561168404Spjd * packobj and perform a dmu_free_range() on bigobj. By comparing 3562168404Spjd * bigobj to packobj, we can verify that the DMU is correctly 3563168404Spjd * tracking which parts of an object are allocated and free, 3564168404Spjd * and that the contents of the allocated blocks are correct. 3565168404Spjd */ 3566168404Spjd 3567168404Spjd /* 3568168404Spjd * Read the directory info. If it's the first time, set things up. 3569168404Spjd */ 3570219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize); 3571219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3572168404Spjd 3573219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3574219089Spjd return; 3575168404Spjd 3576219089Spjd bigobj = od[0].od_object; 3577219089Spjd packobj = od[1].od_object; 3578219089Spjd chunksize = od[0].od_gen; 3579219089Spjd ASSERT(chunksize == od[1].od_gen); 3580168404Spjd 3581168404Spjd /* 3582168404Spjd * Prefetch a random chunk of the big object. 3583168404Spjd * Our aim here is to get some async reads in flight 3584168404Spjd * for blocks that we may free below; the DMU should 3585168404Spjd * handle this race correctly. 3586168404Spjd */ 3587168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3588168404Spjd s = 1 + ztest_random(2 * width - 1); 3589286705Smav dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize, 3590286705Smav ZIO_PRIORITY_SYNC_READ); 3591168404Spjd 3592168404Spjd /* 3593168404Spjd * Pick a random index and compute the offsets into packobj and bigobj. 3594168404Spjd */ 3595168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3596168404Spjd s = 1 + ztest_random(width - 1); 3597168404Spjd 3598168404Spjd packoff = n * sizeof (bufwad_t); 3599168404Spjd packsize = s * sizeof (bufwad_t); 3600168404Spjd 3601219089Spjd bigoff = n * chunksize; 3602219089Spjd bigsize = s * chunksize; 3603168404Spjd 3604168404Spjd packbuf = umem_alloc(packsize, UMEM_NOFAIL); 3605168404Spjd bigbuf = umem_alloc(bigsize, UMEM_NOFAIL); 3606168404Spjd 3607168404Spjd /* 3608168404Spjd * free_percent of the time, free a range of bigobj rather than 3609168404Spjd * overwriting it. 3610168404Spjd */ 3611168404Spjd freeit = (ztest_random(100) < free_percent); 3612168404Spjd 3613168404Spjd /* 3614168404Spjd * Read the current contents of our objects. 3615168404Spjd */ 3616219089Spjd error = dmu_read(os, packobj, packoff, packsize, packbuf, 3617209962Smm DMU_READ_PREFETCH); 3618240415Smm ASSERT0(error); 3619219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf, 3620209962Smm DMU_READ_PREFETCH); 3621240415Smm ASSERT0(error); 3622168404Spjd 3623168404Spjd /* 3624168404Spjd * Get a tx for the mods to both packobj and bigobj. 3625168404Spjd */ 3626168404Spjd tx = dmu_tx_create(os); 3627168404Spjd 3628219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3629168404Spjd 3630168404Spjd if (freeit) 3631219089Spjd dmu_tx_hold_free(tx, bigobj, bigoff, bigsize); 3632168404Spjd else 3633219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3634168404Spjd 3635254077Sdelphij /* This accounts for setting the checksum/compression. */ 3636254077Sdelphij dmu_tx_hold_bonus(tx, bigobj); 3637254077Sdelphij 3638219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3639219089Spjd if (txg == 0) { 3640168404Spjd umem_free(packbuf, packsize); 3641168404Spjd umem_free(bigbuf, bigsize); 3642168404Spjd return; 3643168404Spjd } 3644168404Spjd 3645268075Sdelphij enum zio_checksum cksum; 3646268075Sdelphij do { 3647268075Sdelphij cksum = (enum zio_checksum) 3648268075Sdelphij ztest_random_dsl_prop(ZFS_PROP_CHECKSUM); 3649268075Sdelphij } while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS); 3650268075Sdelphij dmu_object_set_checksum(os, bigobj, cksum, tx); 3651168404Spjd 3652268075Sdelphij enum zio_compress comp; 3653268075Sdelphij do { 3654268075Sdelphij comp = (enum zio_compress) 3655268075Sdelphij ztest_random_dsl_prop(ZFS_PROP_COMPRESSION); 3656268075Sdelphij } while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS); 3657268075Sdelphij dmu_object_set_compress(os, bigobj, comp, tx); 3658219089Spjd 3659168404Spjd /* 3660168404Spjd * For each index from n to n + s, verify that the existing bufwad 3661168404Spjd * in packobj matches the bufwads at the head and tail of the 3662168404Spjd * corresponding chunk in bigobj. Then update all three bufwads 3663168404Spjd * with the new values we want to write out. 3664168404Spjd */ 3665168404Spjd for (i = 0; i < s; i++) { 3666168404Spjd /* LINTED */ 3667168404Spjd pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3668168404Spjd /* LINTED */ 3669219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3670168404Spjd /* LINTED */ 3671219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3672168404Spjd 3673168404Spjd ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3674168404Spjd ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3675168404Spjd 3676168404Spjd if (pack->bw_txg > txg) 3677168404Spjd fatal(0, "future leak: got %llx, open txg is %llx", 3678168404Spjd pack->bw_txg, txg); 3679168404Spjd 3680168404Spjd if (pack->bw_data != 0 && pack->bw_index != n + i) 3681168404Spjd fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3682168404Spjd pack->bw_index, n, i); 3683168404Spjd 3684168404Spjd if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3685168404Spjd fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3686168404Spjd 3687168404Spjd if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3688168404Spjd fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3689168404Spjd 3690168404Spjd if (freeit) { 3691168404Spjd bzero(pack, sizeof (bufwad_t)); 3692168404Spjd } else { 3693168404Spjd pack->bw_index = n + i; 3694168404Spjd pack->bw_txg = txg; 3695168404Spjd pack->bw_data = 1 + ztest_random(-2ULL); 3696168404Spjd } 3697168404Spjd *bigH = *pack; 3698168404Spjd *bigT = *pack; 3699168404Spjd } 3700168404Spjd 3701168404Spjd /* 3702168404Spjd * We've verified all the old bufwads, and made new ones. 3703168404Spjd * Now write them out. 3704168404Spjd */ 3705219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3706168404Spjd 3707168404Spjd if (freeit) { 3708236143Smm if (ztest_opts.zo_verbose >= 7) { 3709168404Spjd (void) printf("freeing offset %llx size %llx" 3710168404Spjd " txg %llx\n", 3711168404Spjd (u_longlong_t)bigoff, 3712168404Spjd (u_longlong_t)bigsize, 3713168404Spjd (u_longlong_t)txg); 3714168404Spjd } 3715219089Spjd VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx)); 3716168404Spjd } else { 3717236143Smm if (ztest_opts.zo_verbose >= 7) { 3718168404Spjd (void) printf("writing offset %llx size %llx" 3719168404Spjd " txg %llx\n", 3720168404Spjd (u_longlong_t)bigoff, 3721168404Spjd (u_longlong_t)bigsize, 3722168404Spjd (u_longlong_t)txg); 3723168404Spjd } 3724219089Spjd dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx); 3725168404Spjd } 3726168404Spjd 3727168404Spjd dmu_tx_commit(tx); 3728168404Spjd 3729168404Spjd /* 3730168404Spjd * Sanity check the stuff we just wrote. 3731168404Spjd */ 3732168404Spjd { 3733168404Spjd void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3734168404Spjd void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3735168404Spjd 3736219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3737209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3738219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3739209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3740168404Spjd 3741168404Spjd ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3742168404Spjd ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3743168404Spjd 3744168404Spjd umem_free(packcheck, packsize); 3745168404Spjd umem_free(bigcheck, bigsize); 3746168404Spjd } 3747168404Spjd 3748168404Spjd umem_free(packbuf, packsize); 3749168404Spjd umem_free(bigbuf, bigsize); 3750168404Spjd} 3751168404Spjd 3752168404Spjdvoid 3753209962Smmcompare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf, 3754219089Spjd uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg) 3755209962Smm{ 3756209962Smm uint64_t i; 3757209962Smm bufwad_t *pack; 3758209962Smm bufwad_t *bigH; 3759209962Smm bufwad_t *bigT; 3760209962Smm 3761209962Smm /* 3762209962Smm * For each index from n to n + s, verify that the existing bufwad 3763209962Smm * in packobj matches the bufwads at the head and tail of the 3764209962Smm * corresponding chunk in bigobj. Then update all three bufwads 3765209962Smm * with the new values we want to write out. 3766209962Smm */ 3767209962Smm for (i = 0; i < s; i++) { 3768209962Smm /* LINTED */ 3769209962Smm pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3770209962Smm /* LINTED */ 3771219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3772209962Smm /* LINTED */ 3773219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3774209962Smm 3775209962Smm ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3776209962Smm ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3777209962Smm 3778209962Smm if (pack->bw_txg > txg) 3779209962Smm fatal(0, "future leak: got %llx, open txg is %llx", 3780209962Smm pack->bw_txg, txg); 3781209962Smm 3782209962Smm if (pack->bw_data != 0 && pack->bw_index != n + i) 3783209962Smm fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3784209962Smm pack->bw_index, n, i); 3785209962Smm 3786209962Smm if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3787209962Smm fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3788209962Smm 3789209962Smm if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3790209962Smm fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3791209962Smm 3792209962Smm pack->bw_index = n + i; 3793209962Smm pack->bw_txg = txg; 3794209962Smm pack->bw_data = 1 + ztest_random(-2ULL); 3795209962Smm 3796209962Smm *bigH = *pack; 3797209962Smm *bigT = *pack; 3798209962Smm } 3799209962Smm} 3800209962Smm 3801209962Smmvoid 3802219089Spjdztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) 3803209962Smm{ 3804219089Spjd objset_t *os = zd->zd_os; 3805219089Spjd ztest_od_t od[2]; 3806209962Smm dmu_tx_t *tx; 3807209962Smm uint64_t i; 3808209962Smm int error; 3809209962Smm uint64_t n, s, txg; 3810209962Smm bufwad_t *packbuf, *bigbuf; 3811219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3812219089Spjd uint64_t blocksize = ztest_random_blocksize(); 3813219089Spjd uint64_t chunksize = blocksize; 3814209962Smm uint64_t regions = 997; 3815209962Smm uint64_t stride = 123456789ULL; 3816209962Smm uint64_t width = 9; 3817209962Smm dmu_buf_t *bonus_db; 3818209962Smm arc_buf_t **bigbuf_arcbufs; 3819219089Spjd dmu_object_info_t doi; 3820209962Smm 3821209962Smm /* 3822209962Smm * This test uses two objects, packobj and bigobj, that are always 3823209962Smm * updated together (i.e. in the same tx) so that their contents are 3824209962Smm * in sync and can be compared. Their contents relate to each other 3825209962Smm * in a simple way: packobj is a dense array of 'bufwad' structures, 3826209962Smm * while bigobj is a sparse array of the same bufwads. Specifically, 3827209962Smm * for any index n, there are three bufwads that should be identical: 3828209962Smm * 3829209962Smm * packobj, at offset n * sizeof (bufwad_t) 3830209962Smm * bigobj, at the head of the nth chunk 3831209962Smm * bigobj, at the tail of the nth chunk 3832209962Smm * 3833209962Smm * The chunk size is set equal to bigobj block size so that 3834209962Smm * dmu_assign_arcbuf() can be tested for object updates. 3835209962Smm */ 3836209962Smm 3837209962Smm /* 3838209962Smm * Read the directory info. If it's the first time, set things up. 3839209962Smm */ 3840219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 3841219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3842209962Smm 3843219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3844219089Spjd return; 3845209962Smm 3846219089Spjd bigobj = od[0].od_object; 3847219089Spjd packobj = od[1].od_object; 3848219089Spjd blocksize = od[0].od_blocksize; 3849219089Spjd chunksize = blocksize; 3850219089Spjd ASSERT(chunksize == od[1].od_gen); 3851209962Smm 3852219089Spjd VERIFY(dmu_object_info(os, bigobj, &doi) == 0); 3853219089Spjd VERIFY(ISP2(doi.doi_data_block_size)); 3854219089Spjd VERIFY(chunksize == doi.doi_data_block_size); 3855219089Spjd VERIFY(chunksize >= 2 * sizeof (bufwad_t)); 3856209962Smm 3857209962Smm /* 3858209962Smm * Pick a random index and compute the offsets into packobj and bigobj. 3859209962Smm */ 3860209962Smm n = ztest_random(regions) * stride + ztest_random(width); 3861209962Smm s = 1 + ztest_random(width - 1); 3862209962Smm 3863209962Smm packoff = n * sizeof (bufwad_t); 3864209962Smm packsize = s * sizeof (bufwad_t); 3865209962Smm 3866219089Spjd bigoff = n * chunksize; 3867219089Spjd bigsize = s * chunksize; 3868209962Smm 3869209962Smm packbuf = umem_zalloc(packsize, UMEM_NOFAIL); 3870209962Smm bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL); 3871209962Smm 3872219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db)); 3873209962Smm 3874209962Smm bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL); 3875209962Smm 3876209962Smm /* 3877209962Smm * Iteration 0 test zcopy for DB_UNCACHED dbufs. 3878209962Smm * Iteration 1 test zcopy to already referenced dbufs. 3879209962Smm * Iteration 2 test zcopy to dirty dbuf in the same txg. 3880209962Smm * Iteration 3 test zcopy to dbuf dirty in previous txg. 3881209962Smm * Iteration 4 test zcopy when dbuf is no longer dirty. 3882209962Smm * Iteration 5 test zcopy when it can't be done. 3883209962Smm * Iteration 6 one more zcopy write. 3884209962Smm */ 3885209962Smm for (i = 0; i < 7; i++) { 3886209962Smm uint64_t j; 3887209962Smm uint64_t off; 3888209962Smm 3889209962Smm /* 3890209962Smm * In iteration 5 (i == 5) use arcbufs 3891209962Smm * that don't match bigobj blksz to test 3892209962Smm * dmu_assign_arcbuf() when it can't directly 3893209962Smm * assign an arcbuf to a dbuf. 3894209962Smm */ 3895209962Smm for (j = 0; j < s; j++) { 3896209962Smm if (i != 5) { 3897209962Smm bigbuf_arcbufs[j] = 3898219089Spjd dmu_request_arcbuf(bonus_db, chunksize); 3899209962Smm } else { 3900209962Smm bigbuf_arcbufs[2 * j] = 3901219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3902209962Smm bigbuf_arcbufs[2 * j + 1] = 3903219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3904209962Smm } 3905209962Smm } 3906209962Smm 3907209962Smm /* 3908209962Smm * Get a tx for the mods to both packobj and bigobj. 3909209962Smm */ 3910209962Smm tx = dmu_tx_create(os); 3911209962Smm 3912219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3913219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3914209962Smm 3915219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3916219089Spjd if (txg == 0) { 3917209962Smm umem_free(packbuf, packsize); 3918209962Smm umem_free(bigbuf, bigsize); 3919209962Smm for (j = 0; j < s; j++) { 3920209962Smm if (i != 5) { 3921209962Smm dmu_return_arcbuf(bigbuf_arcbufs[j]); 3922209962Smm } else { 3923209962Smm dmu_return_arcbuf( 3924209962Smm bigbuf_arcbufs[2 * j]); 3925209962Smm dmu_return_arcbuf( 3926209962Smm bigbuf_arcbufs[2 * j + 1]); 3927209962Smm } 3928209962Smm } 3929209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 3930209962Smm dmu_buf_rele(bonus_db, FTAG); 3931209962Smm return; 3932209962Smm } 3933209962Smm 3934209962Smm /* 3935209962Smm * 50% of the time don't read objects in the 1st iteration to 3936209962Smm * test dmu_assign_arcbuf() for the case when there're no 3937209962Smm * existing dbufs for the specified offsets. 3938209962Smm */ 3939209962Smm if (i != 0 || ztest_random(2) != 0) { 3940219089Spjd error = dmu_read(os, packobj, packoff, 3941209962Smm packsize, packbuf, DMU_READ_PREFETCH); 3942240415Smm ASSERT0(error); 3943219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, 3944209962Smm bigbuf, DMU_READ_PREFETCH); 3945240415Smm ASSERT0(error); 3946209962Smm } 3947209962Smm compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, 3948219089Spjd n, chunksize, txg); 3949209962Smm 3950209962Smm /* 3951209962Smm * We've verified all the old bufwads, and made new ones. 3952209962Smm * Now write them out. 3953209962Smm */ 3954219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3955236143Smm if (ztest_opts.zo_verbose >= 7) { 3956209962Smm (void) printf("writing offset %llx size %llx" 3957209962Smm " txg %llx\n", 3958209962Smm (u_longlong_t)bigoff, 3959209962Smm (u_longlong_t)bigsize, 3960209962Smm (u_longlong_t)txg); 3961209962Smm } 3962219089Spjd for (off = bigoff, j = 0; j < s; j++, off += chunksize) { 3963209962Smm dmu_buf_t *dbt; 3964209962Smm if (i != 5) { 3965209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3966219089Spjd bigbuf_arcbufs[j]->b_data, chunksize); 3967209962Smm } else { 3968209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3969209962Smm bigbuf_arcbufs[2 * j]->b_data, 3970219089Spjd chunksize / 2); 3971209962Smm bcopy((caddr_t)bigbuf + (off - bigoff) + 3972219089Spjd chunksize / 2, 3973209962Smm bigbuf_arcbufs[2 * j + 1]->b_data, 3974219089Spjd chunksize / 2); 3975209962Smm } 3976209962Smm 3977209962Smm if (i == 1) { 3978219089Spjd VERIFY(dmu_buf_hold(os, bigobj, off, 3979219089Spjd FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); 3980209962Smm } 3981209962Smm if (i != 5) { 3982209962Smm dmu_assign_arcbuf(bonus_db, off, 3983209962Smm bigbuf_arcbufs[j], tx); 3984209962Smm } else { 3985209962Smm dmu_assign_arcbuf(bonus_db, off, 3986209962Smm bigbuf_arcbufs[2 * j], tx); 3987209962Smm dmu_assign_arcbuf(bonus_db, 3988219089Spjd off + chunksize / 2, 3989209962Smm bigbuf_arcbufs[2 * j + 1], tx); 3990209962Smm } 3991209962Smm if (i == 1) { 3992209962Smm dmu_buf_rele(dbt, FTAG); 3993209962Smm } 3994209962Smm } 3995209962Smm dmu_tx_commit(tx); 3996209962Smm 3997209962Smm /* 3998209962Smm * Sanity check the stuff we just wrote. 3999209962Smm */ 4000209962Smm { 4001209962Smm void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 4002209962Smm void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 4003209962Smm 4004219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 4005209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 4006219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 4007209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 4008209962Smm 4009209962Smm ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 4010209962Smm ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 4011209962Smm 4012209962Smm umem_free(packcheck, packsize); 4013209962Smm umem_free(bigcheck, bigsize); 4014209962Smm } 4015209962Smm if (i == 2) { 4016209962Smm txg_wait_open(dmu_objset_pool(os), 0); 4017209962Smm } else if (i == 3) { 4018209962Smm txg_wait_synced(dmu_objset_pool(os), 0); 4019209962Smm } 4020209962Smm } 4021209962Smm 4022209962Smm dmu_buf_rele(bonus_db, FTAG); 4023209962Smm umem_free(packbuf, packsize); 4024209962Smm umem_free(bigbuf, bigsize); 4025209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 4026209962Smm} 4027209962Smm 4028219089Spjd/* ARGSUSED */ 4029209962Smmvoid 4030219089Spjdztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id) 4031168404Spjd{ 4032219089Spjd ztest_od_t od[1]; 4033219089Spjd uint64_t offset = (1ULL << (ztest_random(20) + 43)) + 4034219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4035168404Spjd 4036168404Spjd /* 4037219089Spjd * Have multiple threads write to large offsets in an object 4038219089Spjd * to verify that parallel writes to an object -- even to the 4039219089Spjd * same blocks within the object -- doesn't cause any trouble. 4040168404Spjd */ 4041219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4042219089Spjd 4043219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4044219089Spjd return; 4045219089Spjd 4046219089Spjd while (ztest_random(10) != 0) 4047219089Spjd ztest_io(zd, od[0].od_object, offset); 4048168404Spjd} 4049168404Spjd 4050168404Spjdvoid 4051219089Spjdztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id) 4052168404Spjd{ 4053219089Spjd ztest_od_t od[1]; 4054219089Spjd uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) + 4055219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4056219089Spjd uint64_t count = ztest_random(20) + 1; 4057219089Spjd uint64_t blocksize = ztest_random_blocksize(); 4058219089Spjd void *data; 4059168404Spjd 4060219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4061168404Spjd 4062219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4063185029Spjd return; 4064168404Spjd 4065219089Spjd if (ztest_truncate(zd, od[0].od_object, offset, count * blocksize) != 0) 4066185029Spjd return; 4067168404Spjd 4068219089Spjd ztest_prealloc(zd, od[0].od_object, offset, count * blocksize); 4069185029Spjd 4070219089Spjd data = umem_zalloc(blocksize, UMEM_NOFAIL); 4071185029Spjd 4072219089Spjd while (ztest_random(count) != 0) { 4073219089Spjd uint64_t randoff = offset + (ztest_random(count) * blocksize); 4074219089Spjd if (ztest_write(zd, od[0].od_object, randoff, blocksize, 4075219089Spjd data) != 0) 4076219089Spjd break; 4077219089Spjd while (ztest_random(4) != 0) 4078219089Spjd ztest_io(zd, od[0].od_object, randoff); 4079185029Spjd } 4080168404Spjd 4081219089Spjd umem_free(data, blocksize); 4082168404Spjd} 4083168404Spjd 4084168404Spjd/* 4085168404Spjd * Verify that zap_{create,destroy,add,remove,update} work as expected. 4086168404Spjd */ 4087168404Spjd#define ZTEST_ZAP_MIN_INTS 1 4088168404Spjd#define ZTEST_ZAP_MAX_INTS 4 4089168404Spjd#define ZTEST_ZAP_MAX_PROPS 1000 4090168404Spjd 4091168404Spjdvoid 4092219089Spjdztest_zap(ztest_ds_t *zd, uint64_t id) 4093168404Spjd{ 4094219089Spjd objset_t *os = zd->zd_os; 4095219089Spjd ztest_od_t od[1]; 4096168404Spjd uint64_t object; 4097168404Spjd uint64_t txg, last_txg; 4098168404Spjd uint64_t value[ZTEST_ZAP_MAX_INTS]; 4099168404Spjd uint64_t zl_ints, zl_intsize, prop; 4100168404Spjd int i, ints; 4101168404Spjd dmu_tx_t *tx; 4102168404Spjd char propname[100], txgname[100]; 4103168404Spjd int error; 4104168404Spjd char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" }; 4105168404Spjd 4106219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4107168404Spjd 4108219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4109219089Spjd return; 4110219089Spjd 4111219089Spjd object = od[0].od_object; 4112219089Spjd 4113168404Spjd /* 4114219089Spjd * Generate a known hash collision, and verify that 4115219089Spjd * we can lookup and remove both entries. 4116168404Spjd */ 4117219089Spjd tx = dmu_tx_create(os); 4118219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4119219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4120219089Spjd if (txg == 0) 4121219089Spjd return; 4122219089Spjd for (i = 0; i < 2; i++) { 4123219089Spjd value[i] = i; 4124219089Spjd VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t), 4125219089Spjd 1, &value[i], tx)); 4126168404Spjd } 4127219089Spjd for (i = 0; i < 2; i++) { 4128219089Spjd VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i], 4129219089Spjd sizeof (uint64_t), 1, &value[i], tx)); 4130219089Spjd VERIFY3U(0, ==, 4131219089Spjd zap_length(os, object, hc[i], &zl_intsize, &zl_ints)); 4132219089Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4133219089Spjd ASSERT3U(zl_ints, ==, 1); 4134219089Spjd } 4135219089Spjd for (i = 0; i < 2; i++) { 4136219089Spjd VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx)); 4137219089Spjd } 4138219089Spjd dmu_tx_commit(tx); 4139168404Spjd 4140219089Spjd /* 4141219089Spjd * Generate a buch of random entries. 4142219089Spjd */ 4143168404Spjd ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS); 4144168404Spjd 4145185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4146185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4147185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4148185029Spjd bzero(value, sizeof (value)); 4149185029Spjd last_txg = 0; 4150168404Spjd 4151185029Spjd /* 4152185029Spjd * If these zap entries already exist, validate their contents. 4153185029Spjd */ 4154185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4155185029Spjd if (error == 0) { 4156185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4157185029Spjd ASSERT3U(zl_ints, ==, 1); 4158168404Spjd 4159185029Spjd VERIFY(zap_lookup(os, object, txgname, zl_intsize, 4160185029Spjd zl_ints, &last_txg) == 0); 4161168404Spjd 4162185029Spjd VERIFY(zap_length(os, object, propname, &zl_intsize, 4163185029Spjd &zl_ints) == 0); 4164168404Spjd 4165185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4166185029Spjd ASSERT3U(zl_ints, ==, ints); 4167168404Spjd 4168185029Spjd VERIFY(zap_lookup(os, object, propname, zl_intsize, 4169185029Spjd zl_ints, value) == 0); 4170168404Spjd 4171185029Spjd for (i = 0; i < ints; i++) { 4172185029Spjd ASSERT3U(value[i], ==, last_txg + object + i); 4173168404Spjd } 4174185029Spjd } else { 4175185029Spjd ASSERT3U(error, ==, ENOENT); 4176185029Spjd } 4177168404Spjd 4178185029Spjd /* 4179185029Spjd * Atomically update two entries in our zap object. 4180185029Spjd * The first is named txg_%llu, and contains the txg 4181185029Spjd * in which the property was last updated. The second 4182185029Spjd * is named prop_%llu, and the nth element of its value 4183185029Spjd * should be txg + object + n. 4184185029Spjd */ 4185185029Spjd tx = dmu_tx_create(os); 4186219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4187219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4188219089Spjd if (txg == 0) 4189185029Spjd return; 4190168404Spjd 4191185029Spjd if (last_txg > txg) 4192185029Spjd fatal(0, "zap future leak: old %llu new %llu", last_txg, txg); 4193168404Spjd 4194185029Spjd for (i = 0; i < ints; i++) 4195185029Spjd value[i] = txg + object + i; 4196168404Spjd 4197219089Spjd VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t), 4198219089Spjd 1, &txg, tx)); 4199219089Spjd VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t), 4200219089Spjd ints, value, tx)); 4201168404Spjd 4202185029Spjd dmu_tx_commit(tx); 4203168404Spjd 4204185029Spjd /* 4205185029Spjd * Remove a random pair of entries. 4206185029Spjd */ 4207185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4208185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4209185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4210168404Spjd 4211185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4212168404Spjd 4213185029Spjd if (error == ENOENT) 4214185029Spjd return; 4215168404Spjd 4216240415Smm ASSERT0(error); 4217168404Spjd 4218185029Spjd tx = dmu_tx_create(os); 4219219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4220219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4221219089Spjd if (txg == 0) 4222185029Spjd return; 4223219089Spjd VERIFY3U(0, ==, zap_remove(os, object, txgname, tx)); 4224219089Spjd VERIFY3U(0, ==, zap_remove(os, object, propname, tx)); 4225185029Spjd dmu_tx_commit(tx); 4226168404Spjd} 4227168404Spjd 4228209962Smm/* 4229209962Smm * Testcase to test the upgrading of a microzap to fatzap. 4230209962Smm */ 4231168404Spjdvoid 4232219089Spjdztest_fzap(ztest_ds_t *zd, uint64_t id) 4233209962Smm{ 4234219089Spjd objset_t *os = zd->zd_os; 4235219089Spjd ztest_od_t od[1]; 4236219089Spjd uint64_t object, txg; 4237209962Smm 4238219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4239209962Smm 4240219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4241219089Spjd return; 4242209962Smm 4243219089Spjd object = od[0].od_object; 4244209962Smm 4245209962Smm /* 4246219089Spjd * Add entries to this ZAP and make sure it spills over 4247209962Smm * and gets upgraded to a fatzap. Also, since we are adding 4248219089Spjd * 2050 entries we should see ptrtbl growth and leaf-block split. 4249209962Smm */ 4250219089Spjd for (int i = 0; i < 2050; i++) { 4251219089Spjd char name[MAXNAMELEN]; 4252219089Spjd uint64_t value = i; 4253219089Spjd dmu_tx_t *tx; 4254219089Spjd int error; 4255209962Smm 4256219089Spjd (void) snprintf(name, sizeof (name), "fzap-%llu-%llu", 4257219089Spjd id, value); 4258219089Spjd 4259209962Smm tx = dmu_tx_create(os); 4260219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, name); 4261219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4262219089Spjd if (txg == 0) 4263209962Smm return; 4264219089Spjd error = zap_add(os, object, name, sizeof (uint64_t), 1, 4265219089Spjd &value, tx); 4266209962Smm ASSERT(error == 0 || error == EEXIST); 4267209962Smm dmu_tx_commit(tx); 4268209962Smm } 4269209962Smm} 4270209962Smm 4271219089Spjd/* ARGSUSED */ 4272209962Smmvoid 4273219089Spjdztest_zap_parallel(ztest_ds_t *zd, uint64_t id) 4274168404Spjd{ 4275219089Spjd objset_t *os = zd->zd_os; 4276219089Spjd ztest_od_t od[1]; 4277168404Spjd uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc; 4278168404Spjd dmu_tx_t *tx; 4279168404Spjd int i, namelen, error; 4280219089Spjd int micro = ztest_random(2); 4281168404Spjd char name[20], string_value[20]; 4282168404Spjd void *data; 4283168404Spjd 4284219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0); 4285219089Spjd 4286219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4287219089Spjd return; 4288219089Spjd 4289219089Spjd object = od[0].od_object; 4290219089Spjd 4291185029Spjd /* 4292185029Spjd * Generate a random name of the form 'xxx.....' where each 4293185029Spjd * x is a random printable character and the dots are dots. 4294185029Spjd * There are 94 such characters, and the name length goes from 4295185029Spjd * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names. 4296185029Spjd */ 4297185029Spjd namelen = ztest_random(sizeof (name) - 5) + 5 + 1; 4298168404Spjd 4299185029Spjd for (i = 0; i < 3; i++) 4300185029Spjd name[i] = '!' + ztest_random('~' - '!' + 1); 4301185029Spjd for (; i < namelen - 1; i++) 4302185029Spjd name[i] = '.'; 4303185029Spjd name[i] = '\0'; 4304168404Spjd 4305219089Spjd if ((namelen & 1) || micro) { 4306185029Spjd wsize = sizeof (txg); 4307185029Spjd wc = 1; 4308185029Spjd data = &txg; 4309185029Spjd } else { 4310185029Spjd wsize = 1; 4311185029Spjd wc = namelen; 4312185029Spjd data = string_value; 4313185029Spjd } 4314168404Spjd 4315185029Spjd count = -1ULL; 4316248571Smm VERIFY0(zap_count(os, object, &count)); 4317185029Spjd ASSERT(count != -1ULL); 4318168404Spjd 4319185029Spjd /* 4320185029Spjd * Select an operation: length, lookup, add, update, remove. 4321185029Spjd */ 4322185029Spjd i = ztest_random(5); 4323168404Spjd 4324185029Spjd if (i >= 2) { 4325185029Spjd tx = dmu_tx_create(os); 4326219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4327219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4328219089Spjd if (txg == 0) 4329185029Spjd return; 4330185029Spjd bcopy(name, string_value, namelen); 4331185029Spjd } else { 4332185029Spjd tx = NULL; 4333185029Spjd txg = 0; 4334185029Spjd bzero(string_value, namelen); 4335185029Spjd } 4336168404Spjd 4337185029Spjd switch (i) { 4338168404Spjd 4339185029Spjd case 0: 4340185029Spjd error = zap_length(os, object, name, &zl_wsize, &zl_wc); 4341185029Spjd if (error == 0) { 4342185029Spjd ASSERT3U(wsize, ==, zl_wsize); 4343185029Spjd ASSERT3U(wc, ==, zl_wc); 4344185029Spjd } else { 4345185029Spjd ASSERT3U(error, ==, ENOENT); 4346185029Spjd } 4347185029Spjd break; 4348168404Spjd 4349185029Spjd case 1: 4350185029Spjd error = zap_lookup(os, object, name, wsize, wc, data); 4351185029Spjd if (error == 0) { 4352185029Spjd if (data == string_value && 4353185029Spjd bcmp(name, data, namelen) != 0) 4354185029Spjd fatal(0, "name '%s' != val '%s' len %d", 4355185029Spjd name, data, namelen); 4356185029Spjd } else { 4357185029Spjd ASSERT3U(error, ==, ENOENT); 4358185029Spjd } 4359185029Spjd break; 4360168404Spjd 4361185029Spjd case 2: 4362185029Spjd error = zap_add(os, object, name, wsize, wc, data, tx); 4363185029Spjd ASSERT(error == 0 || error == EEXIST); 4364185029Spjd break; 4365168404Spjd 4366185029Spjd case 3: 4367185029Spjd VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0); 4368185029Spjd break; 4369168404Spjd 4370185029Spjd case 4: 4371185029Spjd error = zap_remove(os, object, name, tx); 4372185029Spjd ASSERT(error == 0 || error == ENOENT); 4373185029Spjd break; 4374185029Spjd } 4375168404Spjd 4376185029Spjd if (tx != NULL) 4377185029Spjd dmu_tx_commit(tx); 4378168404Spjd} 4379168404Spjd 4380219089Spjd/* 4381219089Spjd * Commit callback data. 4382219089Spjd */ 4383219089Spjdtypedef struct ztest_cb_data { 4384219089Spjd list_node_t zcd_node; 4385219089Spjd uint64_t zcd_txg; 4386219089Spjd int zcd_expected_err; 4387219089Spjd boolean_t zcd_added; 4388219089Spjd boolean_t zcd_called; 4389219089Spjd spa_t *zcd_spa; 4390219089Spjd} ztest_cb_data_t; 4391219089Spjd 4392219089Spjd/* This is the actual commit callback function */ 4393219089Spjdstatic void 4394219089Spjdztest_commit_callback(void *arg, int error) 4395219089Spjd{ 4396219089Spjd ztest_cb_data_t *data = arg; 4397219089Spjd uint64_t synced_txg; 4398219089Spjd 4399219089Spjd VERIFY(data != NULL); 4400219089Spjd VERIFY3S(data->zcd_expected_err, ==, error); 4401219089Spjd VERIFY(!data->zcd_called); 4402219089Spjd 4403219089Spjd synced_txg = spa_last_synced_txg(data->zcd_spa); 4404219089Spjd if (data->zcd_txg > synced_txg) 4405219089Spjd fatal(0, "commit callback of txg %" PRIu64 " called prematurely" 4406219089Spjd ", last synced txg = %" PRIu64 "\n", data->zcd_txg, 4407219089Spjd synced_txg); 4408219089Spjd 4409219089Spjd data->zcd_called = B_TRUE; 4410219089Spjd 4411219089Spjd if (error == ECANCELED) { 4412240415Smm ASSERT0(data->zcd_txg); 4413219089Spjd ASSERT(!data->zcd_added); 4414219089Spjd 4415219089Spjd /* 4416219089Spjd * The private callback data should be destroyed here, but 4417219089Spjd * since we are going to check the zcd_called field after 4418219089Spjd * dmu_tx_abort(), we will destroy it there. 4419219089Spjd */ 4420219089Spjd return; 4421219089Spjd } 4422219089Spjd 4423219089Spjd /* Was this callback added to the global callback list? */ 4424219089Spjd if (!data->zcd_added) 4425219089Spjd goto out; 4426219089Spjd 4427219089Spjd ASSERT3U(data->zcd_txg, !=, 0); 4428219089Spjd 4429219089Spjd /* Remove our callback from the list */ 4430219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4431219089Spjd list_remove(&zcl.zcl_callbacks, data); 4432219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4433219089Spjd 4434219089Spjdout: 4435219089Spjd umem_free(data, sizeof (ztest_cb_data_t)); 4436219089Spjd} 4437219089Spjd 4438219089Spjd/* Allocate and initialize callback data structure */ 4439219089Spjdstatic ztest_cb_data_t * 4440219089Spjdztest_create_cb_data(objset_t *os, uint64_t txg) 4441219089Spjd{ 4442219089Spjd ztest_cb_data_t *cb_data; 4443219089Spjd 4444219089Spjd cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL); 4445219089Spjd 4446219089Spjd cb_data->zcd_txg = txg; 4447219089Spjd cb_data->zcd_spa = dmu_objset_spa(os); 4448219089Spjd 4449219089Spjd return (cb_data); 4450219089Spjd} 4451219089Spjd 4452219089Spjd/* 4453219089Spjd * If a number of txgs equal to this threshold have been created after a commit 4454219089Spjd * callback has been registered but not called, then we assume there is an 4455219089Spjd * implementation bug. 4456219089Spjd */ 4457219089Spjd#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2) 4458219089Spjd 4459219089Spjd/* 4460219089Spjd * Commit callback test. 4461219089Spjd */ 4462168404Spjdvoid 4463219089Spjdztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) 4464168404Spjd{ 4465219089Spjd objset_t *os = zd->zd_os; 4466219089Spjd ztest_od_t od[1]; 4467219089Spjd dmu_tx_t *tx; 4468219089Spjd ztest_cb_data_t *cb_data[3], *tmp_cb; 4469219089Spjd uint64_t old_txg, txg; 4470219089Spjd int i, error; 4471219089Spjd 4472219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4473219089Spjd 4474219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4475219089Spjd return; 4476219089Spjd 4477219089Spjd tx = dmu_tx_create(os); 4478219089Spjd 4479219089Spjd cb_data[0] = ztest_create_cb_data(os, 0); 4480219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]); 4481219089Spjd 4482219089Spjd dmu_tx_hold_write(tx, od[0].od_object, 0, sizeof (uint64_t)); 4483219089Spjd 4484219089Spjd /* Every once in a while, abort the transaction on purpose */ 4485219089Spjd if (ztest_random(100) == 0) 4486219089Spjd error = -1; 4487219089Spjd 4488219089Spjd if (!error) 4489219089Spjd error = dmu_tx_assign(tx, TXG_NOWAIT); 4490219089Spjd 4491219089Spjd txg = error ? 0 : dmu_tx_get_txg(tx); 4492219089Spjd 4493219089Spjd cb_data[0]->zcd_txg = txg; 4494219089Spjd cb_data[1] = ztest_create_cb_data(os, txg); 4495219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]); 4496219089Spjd 4497219089Spjd if (error) { 4498219089Spjd /* 4499219089Spjd * It's not a strict requirement to call the registered 4500219089Spjd * callbacks from inside dmu_tx_abort(), but that's what 4501219089Spjd * it's supposed to happen in the current implementation 4502219089Spjd * so we will check for that. 4503219089Spjd */ 4504219089Spjd for (i = 0; i < 2; i++) { 4505219089Spjd cb_data[i]->zcd_expected_err = ECANCELED; 4506219089Spjd VERIFY(!cb_data[i]->zcd_called); 4507219089Spjd } 4508219089Spjd 4509219089Spjd dmu_tx_abort(tx); 4510219089Spjd 4511219089Spjd for (i = 0; i < 2; i++) { 4512219089Spjd VERIFY(cb_data[i]->zcd_called); 4513219089Spjd umem_free(cb_data[i], sizeof (ztest_cb_data_t)); 4514219089Spjd } 4515219089Spjd 4516219089Spjd return; 4517219089Spjd } 4518219089Spjd 4519219089Spjd cb_data[2] = ztest_create_cb_data(os, txg); 4520219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]); 4521219089Spjd 4522219089Spjd /* 4523219089Spjd * Read existing data to make sure there isn't a future leak. 4524219089Spjd */ 4525219089Spjd VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t), 4526219089Spjd &old_txg, DMU_READ_PREFETCH)); 4527219089Spjd 4528219089Spjd if (old_txg > txg) 4529219089Spjd fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64, 4530219089Spjd old_txg, txg); 4531219089Spjd 4532219089Spjd dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); 4533219089Spjd 4534219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4535219089Spjd 4536219089Spjd /* 4537219089Spjd * Since commit callbacks don't have any ordering requirement and since 4538219089Spjd * it is theoretically possible for a commit callback to be called 4539219089Spjd * after an arbitrary amount of time has elapsed since its txg has been 4540219089Spjd * synced, it is difficult to reliably determine whether a commit 4541219089Spjd * callback hasn't been called due to high load or due to a flawed 4542219089Spjd * implementation. 4543219089Spjd * 4544219089Spjd * In practice, we will assume that if after a certain number of txgs a 4545219089Spjd * commit callback hasn't been called, then most likely there's an 4546219089Spjd * implementation bug.. 4547219089Spjd */ 4548219089Spjd tmp_cb = list_head(&zcl.zcl_callbacks); 4549219089Spjd if (tmp_cb != NULL && 4550251635Sdelphij (txg - ZTEST_COMMIT_CALLBACK_THRESH) > tmp_cb->zcd_txg) { 4551219089Spjd fatal(0, "Commit callback threshold exceeded, oldest txg: %" 4552219089Spjd PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg); 4553219089Spjd } 4554219089Spjd 4555219089Spjd /* 4556219089Spjd * Let's find the place to insert our callbacks. 4557219089Spjd * 4558219089Spjd * Even though the list is ordered by txg, it is possible for the 4559219089Spjd * insertion point to not be the end because our txg may already be 4560219089Spjd * quiescing at this point and other callbacks in the open txg 4561219089Spjd * (from other objsets) may have sneaked in. 4562219089Spjd */ 4563219089Spjd tmp_cb = list_tail(&zcl.zcl_callbacks); 4564219089Spjd while (tmp_cb != NULL && tmp_cb->zcd_txg > txg) 4565219089Spjd tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb); 4566219089Spjd 4567219089Spjd /* Add the 3 callbacks to the list */ 4568219089Spjd for (i = 0; i < 3; i++) { 4569219089Spjd if (tmp_cb == NULL) 4570219089Spjd list_insert_head(&zcl.zcl_callbacks, cb_data[i]); 4571219089Spjd else 4572219089Spjd list_insert_after(&zcl.zcl_callbacks, tmp_cb, 4573219089Spjd cb_data[i]); 4574219089Spjd 4575219089Spjd cb_data[i]->zcd_added = B_TRUE; 4576219089Spjd VERIFY(!cb_data[i]->zcd_called); 4577219089Spjd 4578219089Spjd tmp_cb = cb_data[i]; 4579219089Spjd } 4580219089Spjd 4581219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4582219089Spjd 4583219089Spjd dmu_tx_commit(tx); 4584219089Spjd} 4585219089Spjd 4586219089Spjd/* ARGSUSED */ 4587219089Spjdvoid 4588219089Spjdztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) 4589219089Spjd{ 4590219089Spjd zfs_prop_t proplist[] = { 4591219089Spjd ZFS_PROP_CHECKSUM, 4592219089Spjd ZFS_PROP_COMPRESSION, 4593219089Spjd ZFS_PROP_COPIES, 4594219089Spjd ZFS_PROP_DEDUP 4595219089Spjd }; 4596219089Spjd 4597236143Smm (void) rw_rdlock(&ztest_name_lock); 4598219089Spjd 4599219089Spjd for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) 4600219089Spjd (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], 4601219089Spjd ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); 4602219089Spjd 4603236143Smm (void) rw_unlock(&ztest_name_lock); 4604219089Spjd} 4605219089Spjd 4606219089Spjd/* ARGSUSED */ 4607219089Spjdvoid 4608219089Spjdztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) 4609219089Spjd{ 4610219089Spjd nvlist_t *props = NULL; 4611219089Spjd 4612236143Smm (void) rw_rdlock(&ztest_name_lock); 4613219089Spjd 4614236143Smm (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO, 4615219089Spjd ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); 4616219089Spjd 4617240415Smm VERIFY0(spa_prop_get(ztest_spa, &props)); 4618219089Spjd 4619236143Smm if (ztest_opts.zo_verbose >= 6) 4620219089Spjd dump_nvlist(props, 4); 4621219089Spjd 4622219089Spjd nvlist_free(props); 4623219089Spjd 4624236143Smm (void) rw_unlock(&ztest_name_lock); 4625219089Spjd} 4626219089Spjd 4627248571Smmstatic int 4628248571Smmuser_release_one(const char *snapname, const char *holdname) 4629248571Smm{ 4630248571Smm nvlist_t *snaps, *holds; 4631248571Smm int error; 4632248571Smm 4633248571Smm snaps = fnvlist_alloc(); 4634248571Smm holds = fnvlist_alloc(); 4635248571Smm fnvlist_add_boolean(holds, holdname); 4636248571Smm fnvlist_add_nvlist(snaps, snapname, holds); 4637248571Smm fnvlist_free(holds); 4638248571Smm error = dsl_dataset_user_release(snaps, NULL); 4639248571Smm fnvlist_free(snaps); 4640248571Smm return (error); 4641248571Smm} 4642248571Smm 4643219089Spjd/* 4644219089Spjd * Test snapshot hold/release and deferred destroy. 4645219089Spjd */ 4646219089Spjdvoid 4647219089Spjdztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) 4648219089Spjd{ 4649219089Spjd int error; 4650219089Spjd objset_t *os = zd->zd_os; 4651219089Spjd objset_t *origin; 4652219089Spjd char snapname[100]; 4653219089Spjd char fullname[100]; 4654219089Spjd char clonename[100]; 4655219089Spjd char tag[100]; 4656168404Spjd char osname[MAXNAMELEN]; 4657248571Smm nvlist_t *holds; 4658168404Spjd 4659236143Smm (void) rw_rdlock(&ztest_name_lock); 4660168404Spjd 4661168404Spjd dmu_objset_name(os, osname); 4662168404Spjd 4663248571Smm (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id); 4664248571Smm (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname); 4665248571Smm (void) snprintf(clonename, sizeof (clonename), 4666248571Smm "%s/ch1_%llu", osname, id); 4667248571Smm (void) snprintf(tag, sizeof (tag), "tag_%llu", id); 4668219089Spjd 4669219089Spjd /* 4670219089Spjd * Clean up from any previous run. 4671219089Spjd */ 4672248571Smm error = dsl_destroy_head(clonename); 4673248571Smm if (error != ENOENT) 4674248571Smm ASSERT0(error); 4675248571Smm error = user_release_one(fullname, tag); 4676248571Smm if (error != ESRCH && error != ENOENT) 4677248571Smm ASSERT0(error); 4678248571Smm error = dsl_destroy_snapshot(fullname, B_FALSE); 4679248571Smm if (error != ENOENT) 4680248571Smm ASSERT0(error); 4681219089Spjd 4682219089Spjd /* 4683219089Spjd * Create snapshot, clone it, mark snap for deferred destroy, 4684219089Spjd * destroy clone, verify snap was also destroyed. 4685219089Spjd */ 4686248571Smm error = dmu_objset_snapshot_one(osname, snapname); 4687219089Spjd if (error) { 4688219089Spjd if (error == ENOSPC) { 4689219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4690219089Spjd goto out; 4691168404Spjd } 4692219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4693219089Spjd } 4694168404Spjd 4695248571Smm error = dmu_objset_clone(clonename, fullname); 4696219089Spjd if (error) { 4697168404Spjd if (error == ENOSPC) { 4698219089Spjd ztest_record_enospc("dmu_objset_clone"); 4699219089Spjd goto out; 4700168404Spjd } 4701219089Spjd fatal(0, "dmu_objset_clone(%s) = %d", clonename, error); 4702219089Spjd } 4703168404Spjd 4704248571Smm error = dsl_destroy_snapshot(fullname, B_TRUE); 4705219089Spjd if (error) { 4706248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", 4707219089Spjd fullname, error); 4708219089Spjd } 4709168404Spjd 4710248571Smm error = dsl_destroy_head(clonename); 4711219089Spjd if (error) 4712248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clonename, error); 4713168404Spjd 4714219089Spjd error = dmu_objset_hold(fullname, FTAG, &origin); 4715219089Spjd if (error != ENOENT) 4716219089Spjd fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); 4717168404Spjd 4718219089Spjd /* 4719219089Spjd * Create snapshot, add temporary hold, verify that we can't 4720219089Spjd * destroy a held snapshot, mark for deferred destroy, 4721219089Spjd * release hold, verify snapshot was destroyed. 4722219089Spjd */ 4723248571Smm error = dmu_objset_snapshot_one(osname, snapname); 4724219089Spjd if (error) { 4725219089Spjd if (error == ENOSPC) { 4726219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4727219089Spjd goto out; 4728168404Spjd } 4729219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4730168404Spjd } 4731168404Spjd 4732248571Smm holds = fnvlist_alloc(); 4733248571Smm fnvlist_add_string(holds, fullname, tag); 4734248571Smm error = dsl_dataset_user_hold(holds, 0, NULL); 4735248571Smm fnvlist_free(holds); 4736248571Smm 4737268075Sdelphij if (error == ENOSPC) { 4738268075Sdelphij ztest_record_enospc("dsl_dataset_user_hold"); 4739268075Sdelphij goto out; 4740268075Sdelphij } else if (error) { 4741268075Sdelphij fatal(0, "dsl_dataset_user_hold(%s, %s) = %u", 4742268075Sdelphij fullname, tag, error); 4743268075Sdelphij } 4744219089Spjd 4745248571Smm error = dsl_destroy_snapshot(fullname, B_FALSE); 4746219089Spjd if (error != EBUSY) { 4747248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d", 4748219089Spjd fullname, error); 4749219089Spjd } 4750219089Spjd 4751248571Smm error = dsl_destroy_snapshot(fullname, B_TRUE); 4752219089Spjd if (error) { 4753248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", 4754219089Spjd fullname, error); 4755219089Spjd } 4756219089Spjd 4757248571Smm error = user_release_one(fullname, tag); 4758219089Spjd if (error) 4759251646Sdelphij fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error); 4760219089Spjd 4761248571Smm VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); 4762219089Spjd 4763219089Spjdout: 4764236143Smm (void) rw_unlock(&ztest_name_lock); 4765168404Spjd} 4766168404Spjd 4767168404Spjd/* 4768168404Spjd * Inject random faults into the on-disk data. 4769168404Spjd */ 4770219089Spjd/* ARGSUSED */ 4771168404Spjdvoid 4772219089Spjdztest_fault_inject(ztest_ds_t *zd, uint64_t id) 4773168404Spjd{ 4774219089Spjd ztest_shared_t *zs = ztest_shared; 4775236143Smm spa_t *spa = ztest_spa; 4776168404Spjd int fd; 4777168404Spjd uint64_t offset; 4778219089Spjd uint64_t leaves; 4779168404Spjd uint64_t bad = 0x1990c0ffeedecadeULL; 4780168404Spjd uint64_t top, leaf; 4781168404Spjd char path0[MAXPATHLEN]; 4782168404Spjd char pathrand[MAXPATHLEN]; 4783168404Spjd size_t fsize; 4784274337Sdelphij int bshift = SPA_OLD_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ 4785168404Spjd int iters = 1000; 4786219089Spjd int maxfaults; 4787219089Spjd int mirror_save; 4788185029Spjd vdev_t *vd0 = NULL; 4789168404Spjd uint64_t guid0 = 0; 4790219089Spjd boolean_t islog = B_FALSE; 4791168404Spjd 4792236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4793219089Spjd maxfaults = MAXFAULTS(); 4794236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 4795219089Spjd mirror_save = zs->zs_mirrors; 4796236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4797219089Spjd 4798185029Spjd ASSERT(leaves >= 1); 4799168404Spjd 4800168404Spjd /* 4801254074Sdelphij * Grab the name lock as reader. There are some operations 4802254074Sdelphij * which don't like to have their vdevs changed while 4803254074Sdelphij * they are in progress (i.e. spa_change_guid). Those 4804254074Sdelphij * operations will have grabbed the name lock as writer. 4805254074Sdelphij */ 4806254074Sdelphij (void) rw_rdlock(&ztest_name_lock); 4807254074Sdelphij 4808254074Sdelphij /* 4809185029Spjd * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. 4810168404Spjd */ 4811185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4812168404Spjd 4813185029Spjd if (ztest_random(2) == 0) { 4814185029Spjd /* 4815219089Spjd * Inject errors on a normal data device or slog device. 4816185029Spjd */ 4817219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 4818219089Spjd leaf = ztest_random(leaves) + zs->zs_splits; 4819168404Spjd 4820185029Spjd /* 4821185029Spjd * Generate paths to the first leaf in this top-level vdev, 4822185029Spjd * and to the random leaf we selected. We'll induce transient 4823185029Spjd * write failures and random online/offline activity on leaf 0, 4824185029Spjd * and we'll write random garbage to the randomly chosen leaf. 4825185029Spjd */ 4826185029Spjd (void) snprintf(path0, sizeof (path0), ztest_dev_template, 4827236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4828236143Smm top * leaves + zs->zs_splits); 4829185029Spjd (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template, 4830236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4831236143Smm top * leaves + leaf); 4832168404Spjd 4833185029Spjd vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); 4834219089Spjd if (vd0 != NULL && vd0->vdev_top->vdev_islog) 4835219089Spjd islog = B_TRUE; 4836219089Spjd 4837254074Sdelphij /* 4838254074Sdelphij * If the top-level vdev needs to be resilvered 4839254074Sdelphij * then we only allow faults on the device that is 4840254074Sdelphij * resilvering. 4841254074Sdelphij */ 4842254074Sdelphij if (vd0 != NULL && maxfaults != 1 && 4843254074Sdelphij (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) || 4844254112Sdelphij vd0->vdev_resilver_txg != 0)) { 4845185029Spjd /* 4846185029Spjd * Make vd0 explicitly claim to be unreadable, 4847185029Spjd * or unwriteable, or reach behind its back 4848185029Spjd * and close the underlying fd. We can do this if 4849185029Spjd * maxfaults == 0 because we'll fail and reexecute, 4850185029Spjd * and we can do it if maxfaults >= 2 because we'll 4851185029Spjd * have enough redundancy. If maxfaults == 1, the 4852185029Spjd * combination of this with injection of random data 4853185029Spjd * corruption below exceeds the pool's fault tolerance. 4854185029Spjd */ 4855185029Spjd vdev_file_t *vf = vd0->vdev_tsd; 4856168404Spjd 4857185029Spjd if (vf != NULL && ztest_random(3) == 0) { 4858185029Spjd (void) close(vf->vf_vnode->v_fd); 4859185029Spjd vf->vf_vnode->v_fd = -1; 4860185029Spjd } else if (ztest_random(2) == 0) { 4861185029Spjd vd0->vdev_cant_read = B_TRUE; 4862185029Spjd } else { 4863185029Spjd vd0->vdev_cant_write = B_TRUE; 4864185029Spjd } 4865185029Spjd guid0 = vd0->vdev_guid; 4866185029Spjd } 4867185029Spjd } else { 4868185029Spjd /* 4869185029Spjd * Inject errors on an l2cache device. 4870185029Spjd */ 4871185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 4872168404Spjd 4873185029Spjd if (sav->sav_count == 0) { 4874185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4875254074Sdelphij (void) rw_unlock(&ztest_name_lock); 4876185029Spjd return; 4877185029Spjd } 4878185029Spjd vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; 4879168404Spjd guid0 = vd0->vdev_guid; 4880185029Spjd (void) strcpy(path0, vd0->vdev_path); 4881185029Spjd (void) strcpy(pathrand, vd0->vdev_path); 4882185029Spjd 4883185029Spjd leaf = 0; 4884185029Spjd leaves = 1; 4885185029Spjd maxfaults = INT_MAX; /* no limit on cache devices */ 4886168404Spjd } 4887168404Spjd 4888185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4889254074Sdelphij (void) rw_unlock(&ztest_name_lock); 4890185029Spjd 4891168404Spjd /* 4892219089Spjd * If we can tolerate two or more faults, or we're dealing 4893219089Spjd * with a slog, randomly online/offline vd0. 4894168404Spjd */ 4895219089Spjd if ((maxfaults >= 2 || islog) && guid0 != 0) { 4896209962Smm if (ztest_random(10) < 6) { 4897209962Smm int flags = (ztest_random(2) == 0 ? 4898209962Smm ZFS_OFFLINE_TEMPORARY : 0); 4899219089Spjd 4900219089Spjd /* 4901219089Spjd * We have to grab the zs_name_lock as writer to 4902219089Spjd * prevent a race between offlining a slog and 4903219089Spjd * destroying a dataset. Offlining the slog will 4904219089Spjd * grab a reference on the dataset which may cause 4905219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 4906219089Spjd * leaving the dataset in an inconsistent state. 4907219089Spjd */ 4908219089Spjd if (islog) 4909236143Smm (void) rw_wrlock(&ztest_name_lock); 4910219089Spjd 4911209962Smm VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); 4912219089Spjd 4913219089Spjd if (islog) 4914236143Smm (void) rw_unlock(&ztest_name_lock); 4915209962Smm } else { 4916242845Sdelphij /* 4917242845Sdelphij * Ideally we would like to be able to randomly 4918242845Sdelphij * call vdev_[on|off]line without holding locks 4919242845Sdelphij * to force unpredictable failures but the side 4920242845Sdelphij * effects of vdev_[on|off]line prevent us from 4921242845Sdelphij * doing so. We grab the ztest_vdev_lock here to 4922242845Sdelphij * prevent a race between injection testing and 4923242845Sdelphij * aux_vdev removal. 4924242845Sdelphij */ 4925242845Sdelphij VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4926209962Smm (void) vdev_online(spa, guid0, 0, NULL); 4927242845Sdelphij VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4928209962Smm } 4929168404Spjd } 4930168404Spjd 4931219089Spjd if (maxfaults == 0) 4932219089Spjd return; 4933219089Spjd 4934168404Spjd /* 4935168404Spjd * We have at least single-fault tolerance, so inject data corruption. 4936168404Spjd */ 4937168404Spjd fd = open(pathrand, O_RDWR); 4938168404Spjd 4939168404Spjd if (fd == -1) /* we hit a gap in the device namespace */ 4940168404Spjd return; 4941168404Spjd 4942168404Spjd fsize = lseek(fd, 0, SEEK_END); 4943168404Spjd 4944168404Spjd while (--iters != 0) { 4945168404Spjd offset = ztest_random(fsize / (leaves << bshift)) * 4946168404Spjd (leaves << bshift) + (leaf << bshift) + 4947168404Spjd (ztest_random(1ULL << (bshift - 1)) & -8ULL); 4948168404Spjd 4949168404Spjd if (offset >= fsize) 4950168404Spjd continue; 4951168404Spjd 4952236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4953219089Spjd if (mirror_save != zs->zs_mirrors) { 4954236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4955219089Spjd (void) close(fd); 4956219089Spjd return; 4957219089Spjd } 4958168404Spjd 4959168404Spjd if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad)) 4960168404Spjd fatal(1, "can't inject bad word at 0x%llx in %s", 4961168404Spjd offset, pathrand); 4962219089Spjd 4963236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4964219089Spjd 4965236143Smm if (ztest_opts.zo_verbose >= 7) 4966219089Spjd (void) printf("injected bad word into %s," 4967219089Spjd " offset 0x%llx\n", pathrand, (u_longlong_t)offset); 4968168404Spjd } 4969168404Spjd 4970168404Spjd (void) close(fd); 4971168404Spjd} 4972168404Spjd 4973168404Spjd/* 4974219089Spjd * Verify that DDT repair works as expected. 4975219089Spjd */ 4976219089Spjdvoid 4977219089Spjdztest_ddt_repair(ztest_ds_t *zd, uint64_t id) 4978219089Spjd{ 4979219089Spjd ztest_shared_t *zs = ztest_shared; 4980236143Smm spa_t *spa = ztest_spa; 4981219089Spjd objset_t *os = zd->zd_os; 4982219089Spjd ztest_od_t od[1]; 4983219089Spjd uint64_t object, blocksize, txg, pattern, psize; 4984219089Spjd enum zio_checksum checksum = spa_dedup_checksum(spa); 4985219089Spjd dmu_buf_t *db; 4986219089Spjd dmu_tx_t *tx; 4987219089Spjd void *buf; 4988219089Spjd blkptr_t blk; 4989219089Spjd int copies = 2 * ZIO_DEDUPDITTO_MIN; 4990219089Spjd 4991219089Spjd blocksize = ztest_random_blocksize(); 4992219089Spjd blocksize = MIN(blocksize, 2048); /* because we write so many */ 4993219089Spjd 4994219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4995219089Spjd 4996219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4997219089Spjd return; 4998219089Spjd 4999219089Spjd /* 5000219089Spjd * Take the name lock as writer to prevent anyone else from changing 5001219089Spjd * the pool and dataset properies we need to maintain during this test. 5002219089Spjd */ 5003236143Smm (void) rw_wrlock(&ztest_name_lock); 5004219089Spjd 5005219089Spjd if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, 5006219089Spjd B_FALSE) != 0 || 5007219089Spjd ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, 5008219089Spjd B_FALSE) != 0) { 5009236143Smm (void) rw_unlock(&ztest_name_lock); 5010219089Spjd return; 5011219089Spjd } 5012219089Spjd 5013219089Spjd object = od[0].od_object; 5014219089Spjd blocksize = od[0].od_blocksize; 5015228103Smm pattern = zs->zs_guid ^ dmu_objset_fsid_guid(os); 5016219089Spjd 5017219089Spjd ASSERT(object != 0); 5018219089Spjd 5019219089Spjd tx = dmu_tx_create(os); 5020219089Spjd dmu_tx_hold_write(tx, object, 0, copies * blocksize); 5021219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 5022219089Spjd if (txg == 0) { 5023236143Smm (void) rw_unlock(&ztest_name_lock); 5024219089Spjd return; 5025219089Spjd } 5026219089Spjd 5027219089Spjd /* 5028219089Spjd * Write all the copies of our block. 5029219089Spjd */ 5030219089Spjd for (int i = 0; i < copies; i++) { 5031219089Spjd uint64_t offset = i * blocksize; 5032248571Smm int error = dmu_buf_hold(os, object, offset, FTAG, &db, 5033248571Smm DMU_READ_NO_PREFETCH); 5034248571Smm if (error != 0) { 5035248571Smm fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u", 5036248571Smm os, (long long)object, (long long) offset, error); 5037248571Smm } 5038219089Spjd ASSERT(db->db_offset == offset); 5039219089Spjd ASSERT(db->db_size == blocksize); 5040219089Spjd ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) || 5041219089Spjd ztest_pattern_match(db->db_data, db->db_size, 0ULL)); 5042219089Spjd dmu_buf_will_fill(db, tx); 5043219089Spjd ztest_pattern_set(db->db_data, db->db_size, pattern); 5044219089Spjd dmu_buf_rele(db, FTAG); 5045219089Spjd } 5046219089Spjd 5047219089Spjd dmu_tx_commit(tx); 5048219089Spjd txg_wait_synced(spa_get_dsl(spa), txg); 5049219089Spjd 5050219089Spjd /* 5051219089Spjd * Find out what block we got. 5052219089Spjd */ 5053243524Smm VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db, 5054243524Smm DMU_READ_NO_PREFETCH)); 5055219089Spjd blk = *((dmu_buf_impl_t *)db)->db_blkptr; 5056219089Spjd dmu_buf_rele(db, FTAG); 5057219089Spjd 5058219089Spjd /* 5059219089Spjd * Damage the block. Dedup-ditto will save us when we read it later. 5060219089Spjd */ 5061219089Spjd psize = BP_GET_PSIZE(&blk); 5062219089Spjd buf = zio_buf_alloc(psize); 5063219089Spjd ztest_pattern_set(buf, psize, ~pattern); 5064219089Spjd 5065219089Spjd (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk, 5066219089Spjd buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, 5067219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL)); 5068219089Spjd 5069219089Spjd zio_buf_free(buf, psize); 5070219089Spjd 5071236143Smm (void) rw_unlock(&ztest_name_lock); 5072219089Spjd} 5073219089Spjd 5074219089Spjd/* 5075168404Spjd * Scrub the pool. 5076168404Spjd */ 5077219089Spjd/* ARGSUSED */ 5078168404Spjdvoid 5079219089Spjdztest_scrub(ztest_ds_t *zd, uint64_t id) 5080168404Spjd{ 5081236143Smm spa_t *spa = ztest_spa; 5082168404Spjd 5083219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5084219089Spjd (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */ 5085219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5086168404Spjd} 5087168404Spjd 5088168404Spjd/* 5089228103Smm * Change the guid for the pool. 5090228103Smm */ 5091228103Smm/* ARGSUSED */ 5092228103Smmvoid 5093228103Smmztest_reguid(ztest_ds_t *zd, uint64_t id) 5094228103Smm{ 5095236143Smm spa_t *spa = ztest_spa; 5096228103Smm uint64_t orig, load; 5097239620Smm int error; 5098228103Smm 5099228103Smm orig = spa_guid(spa); 5100228103Smm load = spa_load_guid(spa); 5101239620Smm 5102239620Smm (void) rw_wrlock(&ztest_name_lock); 5103239620Smm error = spa_change_guid(spa); 5104239620Smm (void) rw_unlock(&ztest_name_lock); 5105239620Smm 5106239620Smm if (error != 0) 5107228103Smm return; 5108228103Smm 5109243505Smm if (ztest_opts.zo_verbose >= 4) { 5110228103Smm (void) printf("Changed guid old %llu -> %llu\n", 5111228103Smm (u_longlong_t)orig, (u_longlong_t)spa_guid(spa)); 5112228103Smm } 5113228103Smm 5114228103Smm VERIFY3U(orig, !=, spa_guid(spa)); 5115228103Smm VERIFY3U(load, ==, spa_load_guid(spa)); 5116228103Smm} 5117228103Smm 5118228103Smm/* 5119168404Spjd * Rename the pool to a different name and then rename it back. 5120168404Spjd */ 5121219089Spjd/* ARGSUSED */ 5122168404Spjdvoid 5123219089Spjdztest_spa_rename(ztest_ds_t *zd, uint64_t id) 5124168404Spjd{ 5125168404Spjd char *oldname, *newname; 5126168404Spjd spa_t *spa; 5127168404Spjd 5128236143Smm (void) rw_wrlock(&ztest_name_lock); 5129168404Spjd 5130236143Smm oldname = ztest_opts.zo_pool; 5131168404Spjd newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); 5132168404Spjd (void) strcpy(newname, oldname); 5133168404Spjd (void) strcat(newname, "_tmp"); 5134168404Spjd 5135168404Spjd /* 5136168404Spjd * Do the rename 5137168404Spjd */ 5138219089Spjd VERIFY3U(0, ==, spa_rename(oldname, newname)); 5139168404Spjd 5140168404Spjd /* 5141168404Spjd * Try to open it under the old name, which shouldn't exist 5142168404Spjd */ 5143219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5144168404Spjd 5145168404Spjd /* 5146168404Spjd * Open it under the new name and make sure it's still the same spa_t. 5147168404Spjd */ 5148219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5149168404Spjd 5150236143Smm ASSERT(spa == ztest_spa); 5151168404Spjd spa_close(spa, FTAG); 5152168404Spjd 5153168404Spjd /* 5154168404Spjd * Rename it back to the original 5155168404Spjd */ 5156219089Spjd VERIFY3U(0, ==, spa_rename(newname, oldname)); 5157168404Spjd 5158168404Spjd /* 5159168404Spjd * Make sure it can still be opened 5160168404Spjd */ 5161219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5162168404Spjd 5163236143Smm ASSERT(spa == ztest_spa); 5164168404Spjd spa_close(spa, FTAG); 5165168404Spjd 5166168404Spjd umem_free(newname, strlen(newname) + 1); 5167168404Spjd 5168236143Smm (void) rw_unlock(&ztest_name_lock); 5169168404Spjd} 5170168404Spjd 5171168404Spjd/* 5172219089Spjd * Verify pool integrity by running zdb. 5173168404Spjd */ 5174168404Spjdstatic void 5175219089Spjdztest_run_zdb(char *pool) 5176168404Spjd{ 5177168404Spjd int status; 5178168404Spjd char zdb[MAXPATHLEN + MAXNAMELEN + 20]; 5179168404Spjd char zbuf[1024]; 5180168404Spjd char *bin; 5181185029Spjd char *ztest; 5182185029Spjd char *isa; 5183185029Spjd int isalen; 5184168404Spjd FILE *fp; 5185168404Spjd 5186214623Spjd strlcpy(zdb, "/usr/bin/ztest", sizeof(zdb)); 5187168404Spjd 5188168404Spjd /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ 5189168404Spjd bin = strstr(zdb, "/usr/bin/"); 5190185029Spjd ztest = strstr(bin, "/ztest"); 5191185029Spjd isa = bin + 8; 5192185029Spjd isalen = ztest - isa; 5193185029Spjd isa = strdup(isa); 5194168404Spjd /* LINTED */ 5195185029Spjd (void) sprintf(bin, 5196268075Sdelphij "/usr/sbin%.*s/zdb -bcc%s%s -d -U %s %s", 5197185029Spjd isalen, 5198185029Spjd isa, 5199236143Smm ztest_opts.zo_verbose >= 3 ? "s" : "", 5200236143Smm ztest_opts.zo_verbose >= 4 ? "v" : "", 5201219089Spjd spa_config_path, 5202208047Smm pool); 5203185029Spjd free(isa); 5204168404Spjd 5205236143Smm if (ztest_opts.zo_verbose >= 5) 5206168404Spjd (void) printf("Executing %s\n", strstr(zdb, "zdb ")); 5207168404Spjd 5208168404Spjd fp = popen(zdb, "r"); 5209168404Spjd assert(fp != NULL); 5210168404Spjd 5211168404Spjd while (fgets(zbuf, sizeof (zbuf), fp) != NULL) 5212236143Smm if (ztest_opts.zo_verbose >= 3) 5213168404Spjd (void) printf("%s", zbuf); 5214168404Spjd 5215168404Spjd status = pclose(fp); 5216168404Spjd 5217168404Spjd if (status == 0) 5218168404Spjd return; 5219168404Spjd 5220168404Spjd ztest_dump_core = 0; 5221168404Spjd if (WIFEXITED(status)) 5222168404Spjd fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status)); 5223168404Spjd else 5224168404Spjd fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status)); 5225168404Spjd} 5226168404Spjd 5227168404Spjdstatic void 5228168404Spjdztest_walk_pool_directory(char *header) 5229168404Spjd{ 5230168404Spjd spa_t *spa = NULL; 5231168404Spjd 5232236143Smm if (ztest_opts.zo_verbose >= 6) 5233168404Spjd (void) printf("%s\n", header); 5234168404Spjd 5235168404Spjd mutex_enter(&spa_namespace_lock); 5236168404Spjd while ((spa = spa_next(spa)) != NULL) 5237236143Smm if (ztest_opts.zo_verbose >= 6) 5238168404Spjd (void) printf("\t%s\n", spa_name(spa)); 5239168404Spjd mutex_exit(&spa_namespace_lock); 5240168404Spjd} 5241168404Spjd 5242168404Spjdstatic void 5243168404Spjdztest_spa_import_export(char *oldname, char *newname) 5244168404Spjd{ 5245209962Smm nvlist_t *config, *newconfig; 5246168404Spjd uint64_t pool_guid; 5247168404Spjd spa_t *spa; 5248248571Smm int error; 5249168404Spjd 5250236143Smm if (ztest_opts.zo_verbose >= 4) { 5251168404Spjd (void) printf("import/export: old = %s, new = %s\n", 5252168404Spjd oldname, newname); 5253168404Spjd } 5254168404Spjd 5255168404Spjd /* 5256168404Spjd * Clean up from previous runs. 5257168404Spjd */ 5258168404Spjd (void) spa_destroy(newname); 5259168404Spjd 5260168404Spjd /* 5261168404Spjd * Get the pool's configuration and guid. 5262168404Spjd */ 5263219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5264168404Spjd 5265209962Smm /* 5266209962Smm * Kick off a scrub to tickle scrub/export races. 5267209962Smm */ 5268209962Smm if (ztest_random(2) == 0) 5269219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5270209962Smm 5271168404Spjd pool_guid = spa_guid(spa); 5272168404Spjd spa_close(spa, FTAG); 5273168404Spjd 5274168404Spjd ztest_walk_pool_directory("pools before export"); 5275168404Spjd 5276168404Spjd /* 5277168404Spjd * Export it. 5278168404Spjd */ 5279219089Spjd VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE)); 5280168404Spjd 5281168404Spjd ztest_walk_pool_directory("pools after export"); 5282168404Spjd 5283168404Spjd /* 5284209962Smm * Try to import it. 5285209962Smm */ 5286209962Smm newconfig = spa_tryimport(config); 5287209962Smm ASSERT(newconfig != NULL); 5288209962Smm nvlist_free(newconfig); 5289209962Smm 5290209962Smm /* 5291168404Spjd * Import it under the new name. 5292168404Spjd */ 5293248571Smm error = spa_import(newname, config, NULL, 0); 5294248571Smm if (error != 0) { 5295248571Smm dump_nvlist(config, 0); 5296248571Smm fatal(B_FALSE, "couldn't import pool %s as %s: error %u", 5297248571Smm oldname, newname, error); 5298248571Smm } 5299168404Spjd 5300168404Spjd ztest_walk_pool_directory("pools after import"); 5301168404Spjd 5302168404Spjd /* 5303168404Spjd * Try to import it again -- should fail with EEXIST. 5304168404Spjd */ 5305219089Spjd VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0)); 5306168404Spjd 5307168404Spjd /* 5308168404Spjd * Try to import it under a different name -- should fail with EEXIST. 5309168404Spjd */ 5310219089Spjd VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0)); 5311168404Spjd 5312168404Spjd /* 5313168404Spjd * Verify that the pool is no longer visible under the old name. 5314168404Spjd */ 5315219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5316168404Spjd 5317168404Spjd /* 5318168404Spjd * Verify that we can open and close the pool using the new name. 5319168404Spjd */ 5320219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5321168404Spjd ASSERT(pool_guid == spa_guid(spa)); 5322168404Spjd spa_close(spa, FTAG); 5323168404Spjd 5324168404Spjd nvlist_free(config); 5325168404Spjd} 5326168404Spjd 5327209962Smmstatic void 5328209962Smmztest_resume(spa_t *spa) 5329209962Smm{ 5330236143Smm if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6) 5331219089Spjd (void) printf("resuming from suspended state\n"); 5332219089Spjd spa_vdev_state_enter(spa, SCL_NONE); 5333219089Spjd vdev_clear(spa, NULL); 5334219089Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 5335219089Spjd (void) zio_resume(spa); 5336209962Smm} 5337209962Smm 5338168404Spjdstatic void * 5339209962Smmztest_resume_thread(void *arg) 5340185029Spjd{ 5341185029Spjd spa_t *spa = arg; 5342185029Spjd 5343185029Spjd while (!ztest_exiting) { 5344219089Spjd if (spa_suspended(spa)) 5345219089Spjd ztest_resume(spa); 5346219089Spjd (void) poll(NULL, 0, 100); 5347185029Spjd } 5348185029Spjd return (NULL); 5349185029Spjd} 5350185029Spjd 5351185029Spjdstatic void * 5352219089Spjdztest_deadman_thread(void *arg) 5353219089Spjd{ 5354219089Spjd ztest_shared_t *zs = arg; 5355254074Sdelphij spa_t *spa = ztest_spa; 5356254074Sdelphij hrtime_t delta, total = 0; 5357219089Spjd 5358254074Sdelphij for (;;) { 5359258632Savg delta = zs->zs_thread_stop - zs->zs_thread_start + 5360258632Savg MSEC2NSEC(zfs_deadman_synctime_ms); 5361219089Spjd 5362258632Savg (void) poll(NULL, 0, (int)NSEC2MSEC(delta)); 5363219089Spjd 5364254074Sdelphij /* 5365254074Sdelphij * If the pool is suspended then fail immediately. Otherwise, 5366254074Sdelphij * check to see if the pool is making any progress. If 5367254074Sdelphij * vdev_deadman() discovers that there hasn't been any recent 5368254074Sdelphij * I/Os then it will end up aborting the tests. 5369254074Sdelphij */ 5370258717Savg if (spa_suspended(spa) || spa->spa_root_vdev == NULL) { 5371254074Sdelphij fatal(0, "aborting test after %llu seconds because " 5372254074Sdelphij "pool has transitioned to a suspended state.", 5373258632Savg zfs_deadman_synctime_ms / 1000); 5374254074Sdelphij return (NULL); 5375254074Sdelphij } 5376254074Sdelphij vdev_deadman(spa->spa_root_vdev); 5377219089Spjd 5378258632Savg total += zfs_deadman_synctime_ms/1000; 5379254074Sdelphij (void) printf("ztest has been running for %lld seconds\n", 5380254074Sdelphij total); 5381254074Sdelphij } 5382219089Spjd} 5383219089Spjd 5384219089Spjdstatic void 5385236143Smmztest_execute(int test, ztest_info_t *zi, uint64_t id) 5386219089Spjd{ 5387236143Smm ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets]; 5388236143Smm ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test); 5389219089Spjd hrtime_t functime = gethrtime(); 5390219089Spjd 5391219089Spjd for (int i = 0; i < zi->zi_iters; i++) 5392219089Spjd zi->zi_func(zd, id); 5393219089Spjd 5394219089Spjd functime = gethrtime() - functime; 5395219089Spjd 5396236143Smm atomic_add_64(&zc->zc_count, 1); 5397236143Smm atomic_add_64(&zc->zc_time, functime); 5398219089Spjd 5399236143Smm if (ztest_opts.zo_verbose >= 4) { 5400219089Spjd Dl_info dli; 5401219089Spjd (void) dladdr((void *)zi->zi_func, &dli); 5402219089Spjd (void) printf("%6.2f sec in %s\n", 5403219089Spjd (double)functime / NANOSEC, dli.dli_sname); 5404219089Spjd } 5405219089Spjd} 5406219089Spjd 5407219089Spjdstatic void * 5408168404Spjdztest_thread(void *arg) 5409168404Spjd{ 5410236143Smm int rand; 5411219089Spjd uint64_t id = (uintptr_t)arg; 5412168404Spjd ztest_shared_t *zs = ztest_shared; 5413219089Spjd uint64_t call_next; 5414219089Spjd hrtime_t now; 5415168404Spjd ztest_info_t *zi; 5416236143Smm ztest_shared_callstate_t *zc; 5417168404Spjd 5418219089Spjd while ((now = gethrtime()) < zs->zs_thread_stop) { 5419168404Spjd /* 5420168404Spjd * See if it's time to force a crash. 5421168404Spjd */ 5422219089Spjd if (now > zs->zs_thread_kill) 5423219089Spjd ztest_kill(zs); 5424168404Spjd 5425168404Spjd /* 5426219089Spjd * If we're getting ENOSPC with some regularity, stop. 5427168404Spjd */ 5428219089Spjd if (zs->zs_enospc_count > 10) 5429219089Spjd break; 5430168404Spjd 5431168404Spjd /* 5432219089Spjd * Pick a random function to execute. 5433168404Spjd */ 5434236143Smm rand = ztest_random(ZTEST_FUNCS); 5435236143Smm zi = &ztest_info[rand]; 5436236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(rand); 5437236143Smm call_next = zc->zc_next; 5438168404Spjd 5439219089Spjd if (now >= call_next && 5440236143Smm atomic_cas_64(&zc->zc_next, call_next, call_next + 5441236143Smm ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) { 5442236143Smm ztest_execute(rand, zi, id); 5443236143Smm } 5444219089Spjd } 5445168404Spjd 5446219089Spjd return (NULL); 5447219089Spjd} 5448168404Spjd 5449219089Spjdstatic void 5450219089Spjdztest_dataset_name(char *dsname, char *pool, int d) 5451219089Spjd{ 5452219089Spjd (void) snprintf(dsname, MAXNAMELEN, "%s/ds_%d", pool, d); 5453219089Spjd} 5454168404Spjd 5455219089Spjdstatic void 5456236143Smmztest_dataset_destroy(int d) 5457219089Spjd{ 5458219089Spjd char name[MAXNAMELEN]; 5459168404Spjd 5460236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5461168404Spjd 5462236143Smm if (ztest_opts.zo_verbose >= 3) 5463219089Spjd (void) printf("Destroying %s to free up space\n", name); 5464168404Spjd 5465219089Spjd /* 5466219089Spjd * Cleanup any non-standard clones and snapshots. In general, 5467219089Spjd * ztest thread t operates on dataset (t % zopt_datasets), 5468219089Spjd * so there may be more than one thing to clean up. 5469219089Spjd */ 5470236143Smm for (int t = d; t < ztest_opts.zo_threads; 5471236143Smm t += ztest_opts.zo_datasets) { 5472219089Spjd ztest_dsl_dataset_cleanup(name, t); 5473236143Smm } 5474219089Spjd 5475219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 5476219089Spjd DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 5477219089Spjd} 5478219089Spjd 5479219089Spjdstatic void 5480219089Spjdztest_dataset_dirobj_verify(ztest_ds_t *zd) 5481219089Spjd{ 5482219089Spjd uint64_t usedobjs, dirobjs, scratch; 5483219089Spjd 5484219089Spjd /* 5485219089Spjd * ZTEST_DIROBJ is the object directory for the entire dataset. 5486219089Spjd * Therefore, the number of objects in use should equal the 5487219089Spjd * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself. 5488219089Spjd * If not, we have an object leak. 5489219089Spjd * 5490219089Spjd * Note that we can only check this in ztest_dataset_open(), 5491219089Spjd * when the open-context and syncing-context values agree. 5492219089Spjd * That's because zap_count() returns the open-context value, 5493219089Spjd * while dmu_objset_space() returns the rootbp fill count. 5494219089Spjd */ 5495219089Spjd VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs)); 5496219089Spjd dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch); 5497219089Spjd ASSERT3U(dirobjs + 1, ==, usedobjs); 5498219089Spjd} 5499219089Spjd 5500219089Spjdstatic int 5501236143Smmztest_dataset_open(int d) 5502219089Spjd{ 5503236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5504236143Smm uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq; 5505219089Spjd objset_t *os; 5506219089Spjd zilog_t *zilog; 5507219089Spjd char name[MAXNAMELEN]; 5508219089Spjd int error; 5509219089Spjd 5510236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5511219089Spjd 5512236143Smm (void) rw_rdlock(&ztest_name_lock); 5513219089Spjd 5514219089Spjd error = ztest_dataset_create(name); 5515219089Spjd if (error == ENOSPC) { 5516236143Smm (void) rw_unlock(&ztest_name_lock); 5517219089Spjd ztest_record_enospc(FTAG); 5518219089Spjd return (error); 5519168404Spjd } 5520219089Spjd ASSERT(error == 0 || error == EEXIST); 5521168404Spjd 5522248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); 5523236143Smm (void) rw_unlock(&ztest_name_lock); 5524219089Spjd 5525236143Smm ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); 5526219089Spjd 5527219089Spjd zilog = zd->zd_zilog; 5528219089Spjd 5529219089Spjd if (zilog->zl_header->zh_claim_lr_seq != 0 && 5530219089Spjd zilog->zl_header->zh_claim_lr_seq < committed_seq) 5531219089Spjd fatal(0, "missing log records: claimed %llu < committed %llu", 5532219089Spjd zilog->zl_header->zh_claim_lr_seq, committed_seq); 5533219089Spjd 5534219089Spjd ztest_dataset_dirobj_verify(zd); 5535219089Spjd 5536219089Spjd zil_replay(os, zd, ztest_replay_vector); 5537219089Spjd 5538219089Spjd ztest_dataset_dirobj_verify(zd); 5539219089Spjd 5540236143Smm if (ztest_opts.zo_verbose >= 6) 5541219089Spjd (void) printf("%s replay %llu blocks, %llu records, seq %llu\n", 5542219089Spjd zd->zd_name, 5543219089Spjd (u_longlong_t)zilog->zl_parse_blk_count, 5544219089Spjd (u_longlong_t)zilog->zl_parse_lr_count, 5545219089Spjd (u_longlong_t)zilog->zl_replaying_seq); 5546219089Spjd 5547219089Spjd zilog = zil_open(os, ztest_get_data); 5548219089Spjd 5549219089Spjd if (zilog->zl_replaying_seq != 0 && 5550219089Spjd zilog->zl_replaying_seq < committed_seq) 5551219089Spjd fatal(0, "missing log records: replayed %llu < committed %llu", 5552219089Spjd zilog->zl_replaying_seq, committed_seq); 5553219089Spjd 5554219089Spjd return (0); 5555168404Spjd} 5556168404Spjd 5557219089Spjdstatic void 5558236143Smmztest_dataset_close(int d) 5559219089Spjd{ 5560236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5561219089Spjd 5562219089Spjd zil_close(zd->zd_zilog); 5563248571Smm dmu_objset_disown(zd->zd_os, zd); 5564219089Spjd 5565219089Spjd ztest_zd_fini(zd); 5566219089Spjd} 5567219089Spjd 5568168404Spjd/* 5569168404Spjd * Kick off threads to run tests on all datasets in parallel. 5570168404Spjd */ 5571168404Spjdstatic void 5572219089Spjdztest_run(ztest_shared_t *zs) 5573168404Spjd{ 5574219089Spjd thread_t *tid; 5575168404Spjd spa_t *spa; 5576228103Smm objset_t *os; 5577185029Spjd thread_t resume_tid; 5578219089Spjd int error; 5579168404Spjd 5580185029Spjd ztest_exiting = B_FALSE; 5581185029Spjd 5582168404Spjd /* 5583219089Spjd * Initialize parent/child shared state. 5584168404Spjd */ 5585236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5586236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5587168404Spjd 5588219089Spjd zs->zs_thread_start = gethrtime(); 5589236143Smm zs->zs_thread_stop = 5590236143Smm zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC; 5591219089Spjd zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop); 5592219089Spjd zs->zs_thread_kill = zs->zs_thread_stop; 5593236143Smm if (ztest_random(100) < ztest_opts.zo_killrate) { 5594236143Smm zs->zs_thread_kill -= 5595236143Smm ztest_random(ztest_opts.zo_passtime * NANOSEC); 5596236143Smm } 5597168404Spjd 5598219089Spjd (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL); 5599168404Spjd 5600219089Spjd list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), 5601219089Spjd offsetof(ztest_cb_data_t, zcd_node)); 5602168404Spjd 5603168404Spjd /* 5604219089Spjd * Open our pool. 5605168404Spjd */ 5606219089Spjd kernel_init(FREAD | FWRITE); 5607248571Smm VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5608224177Smm spa->spa_debug = B_TRUE; 5609268086Sdelphij metaslab_preload_limit = ztest_random(20) + 1; 5610236143Smm ztest_spa = spa; 5611168404Spjd 5612248571Smm VERIFY0(dmu_objset_own(ztest_opts.zo_pool, 5613248571Smm DMU_OST_ANY, B_TRUE, FTAG, &os)); 5614228103Smm zs->zs_guid = dmu_objset_fsid_guid(os); 5615248571Smm dmu_objset_disown(os, FTAG); 5616228103Smm 5617219089Spjd spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; 5618168404Spjd 5619168404Spjd /* 5620209962Smm * We don't expect the pool to suspend unless maxfaults == 0, 5621209962Smm * in which case ztest_fault_inject() temporarily takes away 5622209962Smm * the only valid replica. 5623209962Smm */ 5624219089Spjd if (MAXFAULTS() == 0) 5625209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_WAIT; 5626209962Smm else 5627209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; 5628209962Smm 5629209962Smm /* 5630185029Spjd * Create a thread to periodically resume suspended I/O. 5631185029Spjd */ 5632209962Smm VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, 5633185029Spjd &resume_tid) == 0); 5634185029Spjd 5635185029Spjd /* 5636219089Spjd * Create a deadman thread to abort() if we hang. 5637219089Spjd */ 5638219089Spjd VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, 5639219089Spjd NULL) == 0); 5640219089Spjd 5641219089Spjd /* 5642168404Spjd * Verify that we can safely inquire about about any object, 5643168404Spjd * whether it's allocated or not. To make it interesting, 5644168404Spjd * we probe a 5-wide window around each power of two. 5645168404Spjd * This hits all edge cases, including zero and the max. 5646168404Spjd */ 5647219089Spjd for (int t = 0; t < 64; t++) { 5648219089Spjd for (int d = -5; d <= 5; d++) { 5649168404Spjd error = dmu_object_info(spa->spa_meta_objset, 5650168404Spjd (1ULL << t) + d, NULL); 5651168404Spjd ASSERT(error == 0 || error == ENOENT || 5652168404Spjd error == EINVAL); 5653168404Spjd } 5654168404Spjd } 5655168404Spjd 5656168404Spjd /* 5657219089Spjd * If we got any ENOSPC errors on the previous run, destroy something. 5658168404Spjd */ 5659219089Spjd if (zs->zs_enospc_count != 0) { 5660236143Smm int d = ztest_random(ztest_opts.zo_datasets); 5661236143Smm ztest_dataset_destroy(d); 5662219089Spjd } 5663168404Spjd zs->zs_enospc_count = 0; 5664168404Spjd 5665236143Smm tid = umem_zalloc(ztest_opts.zo_threads * sizeof (thread_t), 5666236143Smm UMEM_NOFAIL); 5667168404Spjd 5668236143Smm if (ztest_opts.zo_verbose >= 4) 5669168404Spjd (void) printf("starting main threads...\n"); 5670168404Spjd 5671219089Spjd /* 5672219089Spjd * Kick off all the tests that run in parallel. 5673219089Spjd */ 5674236143Smm for (int t = 0; t < ztest_opts.zo_threads; t++) { 5675236143Smm if (t < ztest_opts.zo_datasets && 5676236143Smm ztest_dataset_open(t) != 0) 5677219089Spjd return; 5678219089Spjd VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, 5679219089Spjd THR_BOUND, &tid[t]) == 0); 5680219089Spjd } 5681168404Spjd 5682219089Spjd /* 5683219089Spjd * Wait for all of the tests to complete. We go in reverse order 5684219089Spjd * so we don't close datasets while threads are still using them. 5685219089Spjd */ 5686236143Smm for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) { 5687219089Spjd VERIFY(thr_join(tid[t], NULL, NULL) == 0); 5688236143Smm if (t < ztest_opts.zo_datasets) 5689236143Smm ztest_dataset_close(t); 5690219089Spjd } 5691185029Spjd 5692219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5693185029Spjd 5694219089Spjd zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 5695219089Spjd zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); 5696254112Sdelphij zfs_dbgmsg_print(FTAG); 5697168404Spjd 5698236143Smm umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t)); 5699168404Spjd 5700219089Spjd /* Kill the resume thread */ 5701219089Spjd ztest_exiting = B_TRUE; 5702219089Spjd VERIFY(thr_join(resume_tid, NULL, NULL) == 0); 5703219089Spjd ztest_resume(spa); 5704219089Spjd 5705219089Spjd /* 5706219089Spjd * Right before closing the pool, kick off a bunch of async I/O; 5707219089Spjd * spa_close() should wait for it to complete. 5708219089Spjd */ 5709286705Smav for (uint64_t object = 1; object < 50; object++) { 5710286705Smav dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20, 5711286705Smav ZIO_PRIORITY_SYNC_READ); 5712286705Smav } 5713219089Spjd 5714219089Spjd spa_close(spa, FTAG); 5715219089Spjd 5716219089Spjd /* 5717219089Spjd * Verify that we can loop over all pools. 5718219089Spjd */ 5719219089Spjd mutex_enter(&spa_namespace_lock); 5720219089Spjd for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) 5721236143Smm if (ztest_opts.zo_verbose > 3) 5722219089Spjd (void) printf("spa_next: found %s\n", spa_name(spa)); 5723219089Spjd mutex_exit(&spa_namespace_lock); 5724219089Spjd 5725219089Spjd /* 5726219089Spjd * Verify that we can export the pool and reimport it under a 5727219089Spjd * different name. 5728219089Spjd */ 5729219089Spjd if (ztest_random(2) == 0) { 5730219089Spjd char name[MAXNAMELEN]; 5731236143Smm (void) snprintf(name, MAXNAMELEN, "%s_import", 5732236143Smm ztest_opts.zo_pool); 5733236143Smm ztest_spa_import_export(ztest_opts.zo_pool, name); 5734236143Smm ztest_spa_import_export(name, ztest_opts.zo_pool); 5735168404Spjd } 5736168404Spjd 5737219089Spjd kernel_fini(); 5738219089Spjd 5739219089Spjd list_destroy(&zcl.zcl_callbacks); 5740219089Spjd 5741219089Spjd (void) _mutex_destroy(&zcl.zcl_callbacks_lock); 5742219089Spjd 5743236143Smm (void) rwlock_destroy(&ztest_name_lock); 5744236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5745219089Spjd} 5746219089Spjd 5747219089Spjdstatic void 5748236143Smmztest_freeze(void) 5749219089Spjd{ 5750236143Smm ztest_ds_t *zd = &ztest_ds[0]; 5751219089Spjd spa_t *spa; 5752219089Spjd int numloops = 0; 5753219089Spjd 5754236143Smm if (ztest_opts.zo_verbose >= 3) 5755219089Spjd (void) printf("testing spa_freeze()...\n"); 5756168404Spjd 5757219089Spjd kernel_init(FREAD | FWRITE); 5758236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5759236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5760243524Smm spa->spa_debug = B_TRUE; 5761243524Smm ztest_spa = spa; 5762168404Spjd 5763168404Spjd /* 5764219089Spjd * Force the first log block to be transactionally allocated. 5765219089Spjd * We have to do this before we freeze the pool -- otherwise 5766219089Spjd * the log chain won't be anchored. 5767168404Spjd */ 5768219089Spjd while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) { 5769219089Spjd ztest_dmu_object_alloc_free(zd, 0); 5770219089Spjd zil_commit(zd->zd_zilog, 0); 5771168404Spjd } 5772168404Spjd 5773168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5774168404Spjd 5775219089Spjd /* 5776219089Spjd * Freeze the pool. This stops spa_sync() from doing anything, 5777219089Spjd * so that the only way to record changes from now on is the ZIL. 5778219089Spjd */ 5779219089Spjd spa_freeze(spa); 5780185029Spjd 5781219089Spjd /* 5782268855Sdelphij * Because it is hard to predict how much space a write will actually 5783268855Sdelphij * require beforehand, we leave ourselves some fudge space to write over 5784268855Sdelphij * capacity. 5785268855Sdelphij */ 5786268855Sdelphij uint64_t capacity = metaslab_class_get_space(spa_normal_class(spa)) / 2; 5787268855Sdelphij 5788268855Sdelphij /* 5789219089Spjd * Run tests that generate log records but don't alter the pool config 5790219089Spjd * or depend on DSL sync tasks (snapshots, objset create/destroy, etc). 5791219089Spjd * We do a txg_wait_synced() after each iteration to force the txg 5792219089Spjd * to increase well beyond the last synced value in the uberblock. 5793219089Spjd * The ZIL should be OK with that. 5794268855Sdelphij * 5795268855Sdelphij * Run a random number of times less than zo_maxloops and ensure we do 5796268855Sdelphij * not run out of space on the pool. 5797219089Spjd */ 5798236143Smm while (ztest_random(10) != 0 && 5799268855Sdelphij numloops++ < ztest_opts.zo_maxloops && 5800268855Sdelphij metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) { 5801268855Sdelphij ztest_od_t od; 5802268855Sdelphij ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 5803268855Sdelphij VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE)); 5804268855Sdelphij ztest_io(zd, od.od_object, 5805268855Sdelphij ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 5806219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5807219089Spjd } 5808185029Spjd 5809168404Spjd /* 5810219089Spjd * Commit all of the changes we just generated. 5811168404Spjd */ 5812219089Spjd zil_commit(zd->zd_zilog, 0); 5813219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5814168404Spjd 5815219089Spjd /* 5816219089Spjd * Close our dataset and close the pool. 5817219089Spjd */ 5818236143Smm ztest_dataset_close(0); 5819168404Spjd spa_close(spa, FTAG); 5820219089Spjd kernel_fini(); 5821168404Spjd 5822219089Spjd /* 5823219089Spjd * Open and close the pool and dataset to induce log replay. 5824219089Spjd */ 5825219089Spjd kernel_init(FREAD | FWRITE); 5826236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5827239620Smm ASSERT(spa_freeze_txg(spa) == UINT64_MAX); 5828236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5829236143Smm ztest_dataset_close(0); 5830239620Smm 5831239620Smm spa->spa_debug = B_TRUE; 5832239620Smm ztest_spa = spa; 5833239620Smm txg_wait_synced(spa_get_dsl(spa), 0); 5834239620Smm ztest_reguid(NULL, 0); 5835239620Smm 5836219089Spjd spa_close(spa, FTAG); 5837168404Spjd kernel_fini(); 5838168404Spjd} 5839168404Spjd 5840168404Spjdvoid 5841168404Spjdprint_time(hrtime_t t, char *timebuf) 5842168404Spjd{ 5843168404Spjd hrtime_t s = t / NANOSEC; 5844168404Spjd hrtime_t m = s / 60; 5845168404Spjd hrtime_t h = m / 60; 5846168404Spjd hrtime_t d = h / 24; 5847168404Spjd 5848168404Spjd s -= m * 60; 5849168404Spjd m -= h * 60; 5850168404Spjd h -= d * 24; 5851168404Spjd 5852168404Spjd timebuf[0] = '\0'; 5853168404Spjd 5854168404Spjd if (d) 5855168404Spjd (void) sprintf(timebuf, 5856168404Spjd "%llud%02lluh%02llum%02llus", d, h, m, s); 5857168404Spjd else if (h) 5858168404Spjd (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s); 5859168404Spjd else if (m) 5860168404Spjd (void) sprintf(timebuf, "%llum%02llus", m, s); 5861168404Spjd else 5862168404Spjd (void) sprintf(timebuf, "%llus", s); 5863168404Spjd} 5864168404Spjd 5865219089Spjdstatic nvlist_t * 5866219089Spjdmake_random_props() 5867219089Spjd{ 5868219089Spjd nvlist_t *props; 5869219089Spjd 5870236884Smm VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 5871219089Spjd if (ztest_random(2) == 0) 5872236884Smm return (props); 5873219089Spjd VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0); 5874219089Spjd 5875219089Spjd return (props); 5876219089Spjd} 5877219089Spjd 5878168404Spjd/* 5879168404Spjd * Create a storage pool with the given name and initial vdev size. 5880219089Spjd * Then test spa_freeze() functionality. 5881168404Spjd */ 5882168404Spjdstatic void 5883219089Spjdztest_init(ztest_shared_t *zs) 5884168404Spjd{ 5885168404Spjd spa_t *spa; 5886219089Spjd nvlist_t *nvroot, *props; 5887168404Spjd 5888236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5889236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5890219089Spjd 5891168404Spjd kernel_init(FREAD | FWRITE); 5892168404Spjd 5893168404Spjd /* 5894168404Spjd * Create the storage pool. 5895168404Spjd */ 5896236143Smm (void) spa_destroy(ztest_opts.zo_pool); 5897219089Spjd ztest_shared->zs_vdev_next_leaf = 0; 5898219089Spjd zs->zs_splits = 0; 5899236143Smm zs->zs_mirrors = ztest_opts.zo_mirrors; 5900243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, 5901236143Smm 0, ztest_opts.zo_raidz, zs->zs_mirrors, 1); 5902219089Spjd props = make_random_props(); 5903236884Smm for (int i = 0; i < SPA_FEATURES; i++) { 5904236884Smm char buf[1024]; 5905236884Smm (void) snprintf(buf, sizeof (buf), "feature@%s", 5906236884Smm spa_feature_table[i].fi_uname); 5907236884Smm VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); 5908236884Smm } 5909248571Smm VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); 5910168404Spjd nvlist_free(nvroot); 5911168404Spjd 5912236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5913236143Smm zs->zs_metaslab_sz = 5914236143Smm 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; 5915236884Smm 5916219089Spjd spa_close(spa, FTAG); 5917209962Smm 5918219089Spjd kernel_fini(); 5919168404Spjd 5920236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5921168404Spjd 5922236143Smm ztest_freeze(); 5923219089Spjd 5924236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5925219089Spjd 5926236143Smm (void) rwlock_destroy(&ztest_name_lock); 5927236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5928168404Spjd} 5929168404Spjd 5930236143Smmstatic void 5931242845Sdelphijsetup_data_fd(void) 5932236143Smm{ 5933242845Sdelphij static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX"; 5934236143Smm 5935242845Sdelphij ztest_fd_data = mkstemp(ztest_name_data); 5936242845Sdelphij ASSERT3S(ztest_fd_data, >=, 0); 5937242845Sdelphij (void) unlink(ztest_name_data); 5938242845Sdelphij} 5939236143Smm 5940236143Smm 5941236884Smmstatic int 5942236884Smmshared_data_size(ztest_shared_hdr_t *hdr) 5943236884Smm{ 5944236884Smm int size; 5945236884Smm 5946236884Smm size = hdr->zh_hdr_size; 5947236884Smm size += hdr->zh_opts_size; 5948236884Smm size += hdr->zh_size; 5949236884Smm size += hdr->zh_stats_size * hdr->zh_stats_count; 5950236884Smm size += hdr->zh_ds_size * hdr->zh_ds_count; 5951236884Smm 5952236884Smm return (size); 5953236884Smm} 5954236884Smm 5955236143Smmstatic void 5956236143Smmsetup_hdr(void) 5957236143Smm{ 5958236884Smm int size; 5959236143Smm ztest_shared_hdr_t *hdr; 5960236143Smm 5961236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5962242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 5963236143Smm ASSERT(hdr != MAP_FAILED); 5964236143Smm 5965242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t))); 5966236884Smm 5967236143Smm hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t); 5968236143Smm hdr->zh_opts_size = sizeof (ztest_shared_opts_t); 5969236143Smm hdr->zh_size = sizeof (ztest_shared_t); 5970236143Smm hdr->zh_stats_size = sizeof (ztest_shared_callstate_t); 5971236143Smm hdr->zh_stats_count = ZTEST_FUNCS; 5972236143Smm hdr->zh_ds_size = sizeof (ztest_shared_ds_t); 5973236143Smm hdr->zh_ds_count = ztest_opts.zo_datasets; 5974236143Smm 5975236884Smm size = shared_data_size(hdr); 5976242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, size)); 5977236884Smm 5978236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5979236143Smm} 5980236143Smm 5981236143Smmstatic void 5982236143Smmsetup_data(void) 5983236143Smm{ 5984236143Smm int size, offset; 5985236143Smm ztest_shared_hdr_t *hdr; 5986236143Smm uint8_t *buf; 5987236143Smm 5988236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5989242845Sdelphij PROT_READ, MAP_SHARED, ztest_fd_data, 0); 5990236143Smm ASSERT(hdr != MAP_FAILED); 5991236143Smm 5992236884Smm size = shared_data_size(hdr); 5993236143Smm 5994236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5995236143Smm hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()), 5996242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 5997236143Smm ASSERT(hdr != MAP_FAILED); 5998236143Smm buf = (uint8_t *)hdr; 5999236143Smm 6000236143Smm offset = hdr->zh_hdr_size; 6001236143Smm ztest_shared_opts = (void *)&buf[offset]; 6002236143Smm offset += hdr->zh_opts_size; 6003236143Smm ztest_shared = (void *)&buf[offset]; 6004236143Smm offset += hdr->zh_size; 6005236143Smm ztest_shared_callstate = (void *)&buf[offset]; 6006236143Smm offset += hdr->zh_stats_size * hdr->zh_stats_count; 6007236143Smm ztest_shared_ds = (void *)&buf[offset]; 6008236143Smm} 6009236143Smm 6010236143Smmstatic boolean_t 6011236143Smmexec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp) 6012236143Smm{ 6013236143Smm pid_t pid; 6014236143Smm int status; 6015242845Sdelphij char *cmdbuf = NULL; 6016236143Smm 6017236143Smm pid = fork(); 6018236143Smm 6019236143Smm if (cmd == NULL) { 6020242845Sdelphij cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 6021242845Sdelphij (void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN); 6022236143Smm cmd = cmdbuf; 6023236143Smm } 6024236143Smm 6025236143Smm if (pid == -1) 6026236143Smm fatal(1, "fork failed"); 6027236143Smm 6028236143Smm if (pid == 0) { /* child */ 6029236143Smm char *emptyargv[2] = { cmd, NULL }; 6030242845Sdelphij char fd_data_str[12]; 6031236143Smm 6032236143Smm struct rlimit rl = { 1024, 1024 }; 6033236143Smm (void) setrlimit(RLIMIT_NOFILE, &rl); 6034242845Sdelphij 6035242845Sdelphij (void) close(ztest_fd_rand); 6036242845Sdelphij VERIFY3U(11, >=, 6037242845Sdelphij snprintf(fd_data_str, 12, "%d", ztest_fd_data)); 6038242845Sdelphij VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1)); 6039242845Sdelphij 6040236143Smm (void) enable_extended_FILE_stdio(-1, -1); 6041236143Smm if (libpath != NULL) 6042236143Smm VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1)); 6043236143Smm#ifdef illumos 6044236143Smm (void) execv(cmd, emptyargv); 6045236143Smm#else 6046236143Smm (void) execvp(cmd, emptyargv); 6047236143Smm#endif 6048236143Smm ztest_dump_core = B_FALSE; 6049236143Smm fatal(B_TRUE, "exec failed: %s", cmd); 6050236143Smm } 6051236143Smm 6052242845Sdelphij if (cmdbuf != NULL) { 6053242845Sdelphij umem_free(cmdbuf, MAXPATHLEN); 6054242845Sdelphij cmd = NULL; 6055242845Sdelphij } 6056242845Sdelphij 6057236143Smm while (waitpid(pid, &status, 0) != pid) 6058236143Smm continue; 6059236143Smm if (statusp != NULL) 6060236143Smm *statusp = status; 6061236143Smm 6062236143Smm if (WIFEXITED(status)) { 6063236143Smm if (WEXITSTATUS(status) != 0) { 6064236143Smm (void) fprintf(stderr, "child exited with code %d\n", 6065236143Smm WEXITSTATUS(status)); 6066236143Smm exit(2); 6067236143Smm } 6068236143Smm return (B_FALSE); 6069236143Smm } else if (WIFSIGNALED(status)) { 6070236143Smm if (!ignorekill || WTERMSIG(status) != SIGKILL) { 6071236143Smm (void) fprintf(stderr, "child died with signal %d\n", 6072236143Smm WTERMSIG(status)); 6073236143Smm exit(3); 6074236143Smm } 6075236143Smm return (B_TRUE); 6076236143Smm } else { 6077236143Smm (void) fprintf(stderr, "something strange happened to child\n"); 6078236143Smm exit(4); 6079236143Smm /* NOTREACHED */ 6080236143Smm } 6081236143Smm} 6082236143Smm 6083236143Smmstatic void 6084236143Smmztest_run_init(void) 6085236143Smm{ 6086236143Smm ztest_shared_t *zs = ztest_shared; 6087236143Smm 6088236143Smm ASSERT(ztest_opts.zo_init != 0); 6089236143Smm 6090236143Smm /* 6091236143Smm * Blow away any existing copy of zpool.cache 6092236143Smm */ 6093236143Smm (void) remove(spa_config_path); 6094236143Smm 6095236143Smm /* 6096236143Smm * Create and initialize our storage pool. 6097236143Smm */ 6098236143Smm for (int i = 1; i <= ztest_opts.zo_init; i++) { 6099236143Smm bzero(zs, sizeof (ztest_shared_t)); 6100236143Smm if (ztest_opts.zo_verbose >= 3 && 6101236143Smm ztest_opts.zo_init != 1) { 6102236143Smm (void) printf("ztest_init(), pass %d\n", i); 6103236143Smm } 6104236143Smm ztest_init(zs); 6105236143Smm } 6106236143Smm} 6107236143Smm 6108168404Spjdint 6109168404Spjdmain(int argc, char **argv) 6110168404Spjd{ 6111168404Spjd int kills = 0; 6112168404Spjd int iters = 0; 6113236143Smm int older = 0; 6114236143Smm int newer = 0; 6115168404Spjd ztest_shared_t *zs; 6116168404Spjd ztest_info_t *zi; 6117236143Smm ztest_shared_callstate_t *zc; 6118168404Spjd char timebuf[100]; 6119168404Spjd char numbuf[6]; 6120219089Spjd spa_t *spa; 6121242845Sdelphij char *cmd; 6122236143Smm boolean_t hasalt; 6123242845Sdelphij char *fd_data_str = getenv("ZTEST_FD_DATA"); 6124168404Spjd 6125168404Spjd (void) setvbuf(stdout, NULL, _IOLBF, 0); 6126168404Spjd 6127240133Smm dprintf_setup(&argc, argv); 6128258632Savg zfs_deadman_synctime_ms = 300000; 6129240133Smm 6130242845Sdelphij ztest_fd_rand = open("/dev/urandom", O_RDONLY); 6131242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 6132242845Sdelphij 6133242845Sdelphij if (!fd_data_str) { 6134236143Smm process_options(argc, argv); 6135168404Spjd 6136242845Sdelphij setup_data_fd(); 6137236143Smm setup_hdr(); 6138236143Smm setup_data(); 6139236143Smm bcopy(&ztest_opts, ztest_shared_opts, 6140236143Smm sizeof (*ztest_shared_opts)); 6141236143Smm } else { 6142242845Sdelphij ztest_fd_data = atoi(fd_data_str); 6143236143Smm setup_data(); 6144236143Smm bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts)); 6145236143Smm } 6146236143Smm ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count); 6147168404Spjd 6148219089Spjd /* Override location of zpool.cache */ 6149242845Sdelphij VERIFY3U(asprintf((char **)&spa_config_path, "%s/zpool.cache", 6150242845Sdelphij ztest_opts.zo_dir), !=, -1); 6151219089Spjd 6152236143Smm ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t), 6153236143Smm UMEM_NOFAIL); 6154236143Smm zs = ztest_shared; 6155168404Spjd 6156242845Sdelphij if (fd_data_str) { 6157236143Smm metaslab_gang_bang = ztest_opts.zo_metaslab_gang_bang; 6158236143Smm metaslab_df_alloc_threshold = 6159236143Smm zs->zs_metaslab_df_alloc_threshold; 6160219089Spjd 6161236143Smm if (zs->zs_do_init) 6162236143Smm ztest_run_init(); 6163236143Smm else 6164236143Smm ztest_run(zs); 6165236143Smm exit(0); 6166236143Smm } 6167168404Spjd 6168236143Smm hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0); 6169236143Smm 6170236143Smm if (ztest_opts.zo_verbose >= 1) { 6171168404Spjd (void) printf("%llu vdevs, %d datasets, %d threads," 6172168404Spjd " %llu seconds...\n", 6173236143Smm (u_longlong_t)ztest_opts.zo_vdevs, 6174236143Smm ztest_opts.zo_datasets, 6175236143Smm ztest_opts.zo_threads, 6176236143Smm (u_longlong_t)ztest_opts.zo_time); 6177168404Spjd } 6178168404Spjd 6179242845Sdelphij cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); 6180242845Sdelphij (void) strlcpy(cmd, getexecname(), MAXNAMELEN); 6181236143Smm 6182236143Smm zs->zs_do_init = B_TRUE; 6183236143Smm if (strlen(ztest_opts.zo_alt_ztest) != 0) { 6184236143Smm if (ztest_opts.zo_verbose >= 1) { 6185236143Smm (void) printf("Executing older ztest for " 6186236143Smm "initialization: %s\n", ztest_opts.zo_alt_ztest); 6187236143Smm } 6188236143Smm VERIFY(!exec_child(ztest_opts.zo_alt_ztest, 6189236143Smm ztest_opts.zo_alt_libpath, B_FALSE, NULL)); 6190236143Smm } else { 6191236143Smm VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL)); 6192168404Spjd } 6193236143Smm zs->zs_do_init = B_FALSE; 6194168404Spjd 6195219089Spjd zs->zs_proc_start = gethrtime(); 6196236143Smm zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC; 6197219089Spjd 6198219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6199236143Smm zi = &ztest_info[f]; 6200236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6201219089Spjd if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop) 6202236143Smm zc->zc_next = UINT64_MAX; 6203168404Spjd else 6204236143Smm zc->zc_next = zs->zs_proc_start + 6205219089Spjd ztest_random(2 * zi->zi_interval[0] + 1); 6206168404Spjd } 6207168404Spjd 6208168404Spjd /* 6209168404Spjd * Run the tests in a loop. These tests include fault injection 6210168404Spjd * to verify that self-healing data works, and forced crashes 6211168404Spjd * to verify that we never lose on-disk consistency. 6212168404Spjd */ 6213219089Spjd while (gethrtime() < zs->zs_proc_stop) { 6214168404Spjd int status; 6215236143Smm boolean_t killed; 6216168404Spjd 6217168404Spjd /* 6218168404Spjd * Initialize the workload counters for each function. 6219168404Spjd */ 6220219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6221236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6222236143Smm zc->zc_count = 0; 6223236143Smm zc->zc_time = 0; 6224168404Spjd } 6225168404Spjd 6226209962Smm /* Set the allocation switch size */ 6227236143Smm zs->zs_metaslab_df_alloc_threshold = 6228236143Smm ztest_random(zs->zs_metaslab_sz / 4) + 1; 6229209962Smm 6230236143Smm if (!hasalt || ztest_random(2) == 0) { 6231236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6232236143Smm (void) printf("Executing newer ztest: %s\n", 6233236143Smm cmd); 6234168404Spjd } 6235236143Smm newer++; 6236236143Smm killed = exec_child(cmd, NULL, B_TRUE, &status); 6237236143Smm } else { 6238236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6239236143Smm (void) printf("Executing older ztest: %s\n", 6240236143Smm ztest_opts.zo_alt_ztest); 6241168404Spjd } 6242236143Smm older++; 6243236143Smm killed = exec_child(ztest_opts.zo_alt_ztest, 6244236143Smm ztest_opts.zo_alt_libpath, B_TRUE, &status); 6245168404Spjd } 6246168404Spjd 6247236143Smm if (killed) 6248236143Smm kills++; 6249168404Spjd iters++; 6250168404Spjd 6251236143Smm if (ztest_opts.zo_verbose >= 1) { 6252168404Spjd hrtime_t now = gethrtime(); 6253168404Spjd 6254219089Spjd now = MIN(now, zs->zs_proc_stop); 6255219089Spjd print_time(zs->zs_proc_stop - now, timebuf); 6256168404Spjd nicenum(zs->zs_space, numbuf); 6257168404Spjd 6258168404Spjd (void) printf("Pass %3d, %8s, %3llu ENOSPC, " 6259168404Spjd "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n", 6260168404Spjd iters, 6261168404Spjd WIFEXITED(status) ? "Complete" : "SIGKILL", 6262168404Spjd (u_longlong_t)zs->zs_enospc_count, 6263168404Spjd 100.0 * zs->zs_alloc / zs->zs_space, 6264168404Spjd numbuf, 6265219089Spjd 100.0 * (now - zs->zs_proc_start) / 6266236143Smm (ztest_opts.zo_time * NANOSEC), timebuf); 6267168404Spjd } 6268168404Spjd 6269236143Smm if (ztest_opts.zo_verbose >= 2) { 6270168404Spjd (void) printf("\nWorkload summary:\n\n"); 6271168404Spjd (void) printf("%7s %9s %s\n", 6272168404Spjd "Calls", "Time", "Function"); 6273168404Spjd (void) printf("%7s %9s %s\n", 6274168404Spjd "-----", "----", "--------"); 6275219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6276168404Spjd Dl_info dli; 6277168404Spjd 6278236143Smm zi = &ztest_info[f]; 6279236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6280236143Smm print_time(zc->zc_time, timebuf); 6281168404Spjd (void) dladdr((void *)zi->zi_func, &dli); 6282168404Spjd (void) printf("%7llu %9s %s\n", 6283236143Smm (u_longlong_t)zc->zc_count, timebuf, 6284168404Spjd dli.dli_sname); 6285168404Spjd } 6286168404Spjd (void) printf("\n"); 6287168404Spjd } 6288168404Spjd 6289168404Spjd /* 6290219089Spjd * It's possible that we killed a child during a rename test, 6291219089Spjd * in which case we'll have a 'ztest_tmp' pool lying around 6292219089Spjd * instead of 'ztest'. Do a blind rename in case this happened. 6293168404Spjd */ 6294219089Spjd kernel_init(FREAD); 6295236143Smm if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) { 6296219089Spjd spa_close(spa, FTAG); 6297219089Spjd } else { 6298219089Spjd char tmpname[MAXNAMELEN]; 6299219089Spjd kernel_fini(); 6300219089Spjd kernel_init(FREAD | FWRITE); 6301219089Spjd (void) snprintf(tmpname, sizeof (tmpname), "%s_tmp", 6302236143Smm ztest_opts.zo_pool); 6303236143Smm (void) spa_rename(tmpname, ztest_opts.zo_pool); 6304219089Spjd } 6305168404Spjd kernel_fini(); 6306219089Spjd 6307236143Smm ztest_run_zdb(ztest_opts.zo_pool); 6308168404Spjd } 6309168404Spjd 6310236143Smm if (ztest_opts.zo_verbose >= 1) { 6311236143Smm if (hasalt) { 6312236143Smm (void) printf("%d runs of older ztest: %s\n", older, 6313236143Smm ztest_opts.zo_alt_ztest); 6314236143Smm (void) printf("%d runs of newer ztest: %s\n", newer, 6315236143Smm cmd); 6316236143Smm } 6317168404Spjd (void) printf("%d killed, %d completed, %.0f%% kill rate\n", 6318168404Spjd kills, iters - kills, (100.0 * kills) / MAX(1, iters)); 6319168404Spjd } 6320168404Spjd 6321242845Sdelphij umem_free(cmd, MAXNAMELEN); 6322242845Sdelphij 6323168404Spjd return (0); 6324168404Spjd} 6325