ztest.c revision 251635
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23236143Smm * Copyright (c) 2012 by Delphix. All rights reserved. 24228103Smm * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 25236143Smm * Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved. 26168404Spjd */ 27168404Spjd 28168404Spjd/* 29168404Spjd * The objective of this program is to provide a DMU/ZAP/SPA stress test 30168404Spjd * that runs entirely in userland, is easy to use, and easy to extend. 31168404Spjd * 32168404Spjd * The overall design of the ztest program is as follows: 33168404Spjd * 34168404Spjd * (1) For each major functional area (e.g. adding vdevs to a pool, 35168404Spjd * creating and destroying datasets, reading and writing objects, etc) 36168404Spjd * we have a simple routine to test that functionality. These 37168404Spjd * individual routines do not have to do anything "stressful". 38168404Spjd * 39168404Spjd * (2) We turn these simple functionality tests into a stress test by 40168404Spjd * running them all in parallel, with as many threads as desired, 41168404Spjd * and spread across as many datasets, objects, and vdevs as desired. 42168404Spjd * 43168404Spjd * (3) While all this is happening, we inject faults into the pool to 44168404Spjd * verify that self-healing data really works. 45168404Spjd * 46168404Spjd * (4) Every time we open a dataset, we change its checksum and compression 47168404Spjd * functions. Thus even individual objects vary from block to block 48168404Spjd * in which checksum they use and whether they're compressed. 49168404Spjd * 50168404Spjd * (5) To verify that we never lose on-disk consistency after a crash, 51168404Spjd * we run the entire test in a child of the main process. 52168404Spjd * At random times, the child self-immolates with a SIGKILL. 53168404Spjd * This is the software equivalent of pulling the power cord. 54168404Spjd * The parent then runs the test again, using the existing 55236143Smm * storage pool, as many times as desired. If backwards compatability 56236143Smm * testing is enabled ztest will sometimes run the "older" version 57236143Smm * of ztest after a SIGKILL. 58168404Spjd * 59168404Spjd * (6) To verify that we don't have future leaks or temporal incursions, 60168404Spjd * many of the functional tests record the transaction group number 61168404Spjd * as part of their data. When reading old data, they verify that 62168404Spjd * the transaction group number is less than the current, open txg. 63168404Spjd * If you add a new test, please do this if applicable. 64168404Spjd * 65168404Spjd * When run with no arguments, ztest runs for about five minutes and 66168404Spjd * produces no output if successful. To get a little bit of information, 67168404Spjd * specify -V. To get more information, specify -VV, and so on. 68168404Spjd * 69168404Spjd * To turn this into an overnight stress test, use -T to specify run time. 70168404Spjd * 71168404Spjd * You can ask more more vdevs [-v], datasets [-d], or threads [-t] 72168404Spjd * to increase the pool capacity, fanout, and overall stress level. 73168404Spjd * 74236143Smm * Use the -k option to set the desired frequency of kills. 75236143Smm * 76236143Smm * When ztest invokes itself it passes all relevant information through a 77236143Smm * temporary file which is mmap-ed in the child process. This allows shared 78236143Smm * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always 79236143Smm * stored at offset 0 of this file and contains information on the size and 80236143Smm * number of shared structures in the file. The information stored in this file 81236143Smm * must remain backwards compatible with older versions of ztest so that 82236143Smm * ztest can invoke them during backwards compatibility testing (-B). 83168404Spjd */ 84168404Spjd 85168404Spjd#include <sys/zfs_context.h> 86168404Spjd#include <sys/spa.h> 87168404Spjd#include <sys/dmu.h> 88168404Spjd#include <sys/txg.h> 89209962Smm#include <sys/dbuf.h> 90168404Spjd#include <sys/zap.h> 91168404Spjd#include <sys/dmu_objset.h> 92168404Spjd#include <sys/poll.h> 93168404Spjd#include <sys/stat.h> 94168404Spjd#include <sys/time.h> 95168404Spjd#include <sys/wait.h> 96168404Spjd#include <sys/mman.h> 97168404Spjd#include <sys/resource.h> 98168404Spjd#include <sys/zio.h> 99168404Spjd#include <sys/zil.h> 100219089Spjd#include <sys/zil_impl.h> 101168404Spjd#include <sys/vdev_impl.h> 102185029Spjd#include <sys/vdev_file.h> 103168404Spjd#include <sys/spa_impl.h> 104219089Spjd#include <sys/metaslab_impl.h> 105168404Spjd#include <sys/dsl_prop.h> 106207910Smm#include <sys/dsl_dataset.h> 107248571Smm#include <sys/dsl_destroy.h> 108219089Spjd#include <sys/dsl_scan.h> 109219089Spjd#include <sys/zio_checksum.h> 110168404Spjd#include <sys/refcount.h> 111236884Smm#include <sys/zfeature.h> 112248571Smm#include <sys/dsl_userhold.h> 113168404Spjd#include <stdio.h> 114168404Spjd#include <stdio_ext.h> 115168404Spjd#include <stdlib.h> 116168404Spjd#include <unistd.h> 117168404Spjd#include <signal.h> 118168404Spjd#include <umem.h> 119168404Spjd#include <dlfcn.h> 120168404Spjd#include <ctype.h> 121168404Spjd#include <math.h> 122168404Spjd#include <errno.h> 123168404Spjd#include <sys/fs/zfs.h> 124219089Spjd#include <libnvpair.h> 125168404Spjd 126242845Sdelphijstatic int ztest_fd_data = -1; 127242845Sdelphijstatic int ztest_fd_rand = -1; 128168404Spjd 129236143Smmtypedef struct ztest_shared_hdr { 130236143Smm uint64_t zh_hdr_size; 131236143Smm uint64_t zh_opts_size; 132236143Smm uint64_t zh_size; 133236143Smm uint64_t zh_stats_size; 134236143Smm uint64_t zh_stats_count; 135236143Smm uint64_t zh_ds_size; 136236143Smm uint64_t zh_ds_count; 137236143Smm} ztest_shared_hdr_t; 138168404Spjd 139236143Smmstatic ztest_shared_hdr_t *ztest_shared_hdr; 140236143Smm 141236143Smmtypedef struct ztest_shared_opts { 142236143Smm char zo_pool[MAXNAMELEN]; 143236143Smm char zo_dir[MAXNAMELEN]; 144236143Smm char zo_alt_ztest[MAXNAMELEN]; 145236143Smm char zo_alt_libpath[MAXNAMELEN]; 146236143Smm uint64_t zo_vdevs; 147236143Smm uint64_t zo_vdevtime; 148236143Smm size_t zo_vdev_size; 149236143Smm int zo_ashift; 150236143Smm int zo_mirrors; 151236143Smm int zo_raidz; 152236143Smm int zo_raidz_parity; 153236143Smm int zo_datasets; 154236143Smm int zo_threads; 155236143Smm uint64_t zo_passtime; 156236143Smm uint64_t zo_killrate; 157236143Smm int zo_verbose; 158236143Smm int zo_init; 159236143Smm uint64_t zo_time; 160236143Smm uint64_t zo_maxloops; 161236143Smm uint64_t zo_metaslab_gang_bang; 162236143Smm} ztest_shared_opts_t; 163236143Smm 164236143Smmstatic const ztest_shared_opts_t ztest_opts_defaults = { 165236143Smm .zo_pool = { 'z', 't', 'e', 's', 't', '\0' }, 166236143Smm .zo_dir = { '/', 't', 'm', 'p', '\0' }, 167236143Smm .zo_alt_ztest = { '\0' }, 168236143Smm .zo_alt_libpath = { '\0' }, 169236143Smm .zo_vdevs = 5, 170236143Smm .zo_ashift = SPA_MINBLOCKSHIFT, 171236143Smm .zo_mirrors = 2, 172236143Smm .zo_raidz = 4, 173236143Smm .zo_raidz_parity = 1, 174236143Smm .zo_vdev_size = SPA_MINDEVSIZE, 175236143Smm .zo_datasets = 7, 176236143Smm .zo_threads = 23, 177236143Smm .zo_passtime = 60, /* 60 seconds */ 178236143Smm .zo_killrate = 70, /* 70% kill rate */ 179236143Smm .zo_verbose = 0, 180236143Smm .zo_init = 1, 181236143Smm .zo_time = 300, /* 5 minutes */ 182236143Smm .zo_maxloops = 50, /* max loops during spa_freeze() */ 183236143Smm .zo_metaslab_gang_bang = 32 << 10 184236143Smm}; 185236143Smm 186236143Smmextern uint64_t metaslab_gang_bang; 187236143Smmextern uint64_t metaslab_df_alloc_threshold; 188236143Smm 189236143Smmstatic ztest_shared_opts_t *ztest_shared_opts; 190236143Smmstatic ztest_shared_opts_t ztest_opts; 191236143Smm 192236143Smmtypedef struct ztest_shared_ds { 193236143Smm uint64_t zd_seq; 194236143Smm} ztest_shared_ds_t; 195236143Smm 196236143Smmstatic ztest_shared_ds_t *ztest_shared_ds; 197236143Smm#define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d]) 198236143Smm 199219089Spjd#define BT_MAGIC 0x123456789abcdefULL 200236143Smm#define MAXFAULTS() \ 201236143Smm (MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1) 202219089Spjd 203219089Spjdenum ztest_io_type { 204219089Spjd ZTEST_IO_WRITE_TAG, 205219089Spjd ZTEST_IO_WRITE_PATTERN, 206219089Spjd ZTEST_IO_WRITE_ZEROES, 207219089Spjd ZTEST_IO_TRUNCATE, 208219089Spjd ZTEST_IO_SETATTR, 209243524Smm ZTEST_IO_REWRITE, 210219089Spjd ZTEST_IO_TYPES 211219089Spjd}; 212219089Spjd 213185029Spjdtypedef struct ztest_block_tag { 214219089Spjd uint64_t bt_magic; 215185029Spjd uint64_t bt_objset; 216185029Spjd uint64_t bt_object; 217185029Spjd uint64_t bt_offset; 218219089Spjd uint64_t bt_gen; 219185029Spjd uint64_t bt_txg; 220219089Spjd uint64_t bt_crtxg; 221185029Spjd} ztest_block_tag_t; 222185029Spjd 223219089Spjdtypedef struct bufwad { 224219089Spjd uint64_t bw_index; 225219089Spjd uint64_t bw_txg; 226219089Spjd uint64_t bw_data; 227219089Spjd} bufwad_t; 228168404Spjd 229219089Spjd/* 230219089Spjd * XXX -- fix zfs range locks to be generic so we can use them here. 231219089Spjd */ 232219089Spjdtypedef enum { 233219089Spjd RL_READER, 234219089Spjd RL_WRITER, 235219089Spjd RL_APPEND 236219089Spjd} rl_type_t; 237168404Spjd 238219089Spjdtypedef struct rll { 239219089Spjd void *rll_writer; 240219089Spjd int rll_readers; 241219089Spjd mutex_t rll_lock; 242219089Spjd cond_t rll_cv; 243219089Spjd} rll_t; 244219089Spjd 245219089Spjdtypedef struct rl { 246219089Spjd uint64_t rl_object; 247219089Spjd uint64_t rl_offset; 248219089Spjd uint64_t rl_size; 249219089Spjd rll_t *rl_lock; 250219089Spjd} rl_t; 251219089Spjd 252219089Spjd#define ZTEST_RANGE_LOCKS 64 253219089Spjd#define ZTEST_OBJECT_LOCKS 64 254219089Spjd 255168404Spjd/* 256219089Spjd * Object descriptor. Used as a template for object lookup/create/remove. 257219089Spjd */ 258219089Spjdtypedef struct ztest_od { 259219089Spjd uint64_t od_dir; 260219089Spjd uint64_t od_object; 261219089Spjd dmu_object_type_t od_type; 262219089Spjd dmu_object_type_t od_crtype; 263219089Spjd uint64_t od_blocksize; 264219089Spjd uint64_t od_crblocksize; 265219089Spjd uint64_t od_gen; 266219089Spjd uint64_t od_crgen; 267219089Spjd char od_name[MAXNAMELEN]; 268219089Spjd} ztest_od_t; 269219089Spjd 270219089Spjd/* 271219089Spjd * Per-dataset state. 272219089Spjd */ 273219089Spjdtypedef struct ztest_ds { 274236143Smm ztest_shared_ds_t *zd_shared; 275219089Spjd objset_t *zd_os; 276224526Smm rwlock_t zd_zilog_lock; 277219089Spjd zilog_t *zd_zilog; 278219089Spjd ztest_od_t *zd_od; /* debugging aid */ 279219089Spjd char zd_name[MAXNAMELEN]; 280219089Spjd mutex_t zd_dirobj_lock; 281219089Spjd rll_t zd_object_lock[ZTEST_OBJECT_LOCKS]; 282219089Spjd rll_t zd_range_lock[ZTEST_RANGE_LOCKS]; 283219089Spjd} ztest_ds_t; 284219089Spjd 285219089Spjd/* 286219089Spjd * Per-iteration state. 287219089Spjd */ 288219089Spjdtypedef void ztest_func_t(ztest_ds_t *zd, uint64_t id); 289219089Spjd 290219089Spjdtypedef struct ztest_info { 291219089Spjd ztest_func_t *zi_func; /* test function */ 292219089Spjd uint64_t zi_iters; /* iterations per execution */ 293219089Spjd uint64_t *zi_interval; /* execute every <interval> seconds */ 294219089Spjd} ztest_info_t; 295219089Spjd 296236143Smmtypedef struct ztest_shared_callstate { 297236143Smm uint64_t zc_count; /* per-pass count */ 298236143Smm uint64_t zc_time; /* per-pass time */ 299236143Smm uint64_t zc_next; /* next time to call this function */ 300236143Smm} ztest_shared_callstate_t; 301236143Smm 302236143Smmstatic ztest_shared_callstate_t *ztest_shared_callstate; 303236143Smm#define ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c]) 304236143Smm 305219089Spjd/* 306168404Spjd * Note: these aren't static because we want dladdr() to work. 307168404Spjd */ 308168404Spjdztest_func_t ztest_dmu_read_write; 309168404Spjdztest_func_t ztest_dmu_write_parallel; 310168404Spjdztest_func_t ztest_dmu_object_alloc_free; 311219089Spjdztest_func_t ztest_dmu_commit_callbacks; 312168404Spjdztest_func_t ztest_zap; 313168404Spjdztest_func_t ztest_zap_parallel; 314219089Spjdztest_func_t ztest_zil_commit; 315224526Smmztest_func_t ztest_zil_remount; 316219089Spjdztest_func_t ztest_dmu_read_write_zcopy; 317168404Spjdztest_func_t ztest_dmu_objset_create_destroy; 318219089Spjdztest_func_t ztest_dmu_prealloc; 319219089Spjdztest_func_t ztest_fzap; 320168404Spjdztest_func_t ztest_dmu_snapshot_create_destroy; 321219089Spjdztest_func_t ztest_dsl_prop_get_set; 322219089Spjdztest_func_t ztest_spa_prop_get_set; 323168404Spjdztest_func_t ztest_spa_create_destroy; 324168404Spjdztest_func_t ztest_fault_inject; 325219089Spjdztest_func_t ztest_ddt_repair; 326219089Spjdztest_func_t ztest_dmu_snapshot_hold; 327185029Spjdztest_func_t ztest_spa_rename; 328219089Spjdztest_func_t ztest_scrub; 329219089Spjdztest_func_t ztest_dsl_dataset_promote_busy; 330168404Spjdztest_func_t ztest_vdev_attach_detach; 331168404Spjdztest_func_t ztest_vdev_LUN_growth; 332168404Spjdztest_func_t ztest_vdev_add_remove; 333185029Spjdztest_func_t ztest_vdev_aux_add_remove; 334219089Spjdztest_func_t ztest_split_pool; 335228103Smmztest_func_t ztest_reguid; 336243505Smmztest_func_t ztest_spa_upgrade; 337168404Spjd 338219089Spjduint64_t zopt_always = 0ULL * NANOSEC; /* all the time */ 339219089Spjduint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */ 340219089Spjduint64_t zopt_often = 1ULL * NANOSEC; /* every second */ 341219089Spjduint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */ 342219089Spjduint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */ 343168404Spjd 344168404Spjdztest_info_t ztest_info[] = { 345185029Spjd { ztest_dmu_read_write, 1, &zopt_always }, 346219089Spjd { ztest_dmu_write_parallel, 10, &zopt_always }, 347185029Spjd { ztest_dmu_object_alloc_free, 1, &zopt_always }, 348219089Spjd { ztest_dmu_commit_callbacks, 1, &zopt_always }, 349185029Spjd { ztest_zap, 30, &zopt_always }, 350185029Spjd { ztest_zap_parallel, 100, &zopt_always }, 351219089Spjd { ztest_split_pool, 1, &zopt_always }, 352219089Spjd { ztest_zil_commit, 1, &zopt_incessant }, 353224526Smm { ztest_zil_remount, 1, &zopt_sometimes }, 354219089Spjd { ztest_dmu_read_write_zcopy, 1, &zopt_often }, 355219089Spjd { ztest_dmu_objset_create_destroy, 1, &zopt_often }, 356219089Spjd { ztest_dsl_prop_get_set, 1, &zopt_often }, 357219089Spjd { ztest_spa_prop_get_set, 1, &zopt_sometimes }, 358219089Spjd#if 0 359219089Spjd { ztest_dmu_prealloc, 1, &zopt_sometimes }, 360219089Spjd#endif 361219089Spjd { ztest_fzap, 1, &zopt_sometimes }, 362219089Spjd { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, 363219089Spjd { ztest_spa_create_destroy, 1, &zopt_sometimes }, 364185029Spjd { ztest_fault_inject, 1, &zopt_sometimes }, 365219089Spjd { ztest_ddt_repair, 1, &zopt_sometimes }, 366219089Spjd { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, 367228103Smm { ztest_reguid, 1, &zopt_sometimes }, 368185029Spjd { ztest_spa_rename, 1, &zopt_rarely }, 369219089Spjd { ztest_scrub, 1, &zopt_rarely }, 370243505Smm { ztest_spa_upgrade, 1, &zopt_rarely }, 371219089Spjd { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, 372248571Smm { ztest_vdev_attach_detach, 1, &zopt_sometimes }, 373185029Spjd { ztest_vdev_LUN_growth, 1, &zopt_rarely }, 374236143Smm { ztest_vdev_add_remove, 1, 375236143Smm &ztest_opts.zo_vdevtime }, 376236143Smm { ztest_vdev_aux_add_remove, 1, 377236143Smm &ztest_opts.zo_vdevtime }, 378168404Spjd}; 379168404Spjd 380168404Spjd#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t)) 381168404Spjd 382219089Spjd/* 383219089Spjd * The following struct is used to hold a list of uncalled commit callbacks. 384219089Spjd * The callbacks are ordered by txg number. 385219089Spjd */ 386219089Spjdtypedef struct ztest_cb_list { 387219089Spjd mutex_t zcl_callbacks_lock; 388219089Spjd list_t zcl_callbacks; 389219089Spjd} ztest_cb_list_t; 390168404Spjd 391168404Spjd/* 392168404Spjd * Stuff we need to share writably between parent and child. 393168404Spjd */ 394168404Spjdtypedef struct ztest_shared { 395236143Smm boolean_t zs_do_init; 396219089Spjd hrtime_t zs_proc_start; 397219089Spjd hrtime_t zs_proc_stop; 398219089Spjd hrtime_t zs_thread_start; 399219089Spjd hrtime_t zs_thread_stop; 400219089Spjd hrtime_t zs_thread_kill; 401219089Spjd uint64_t zs_enospc_count; 402219089Spjd uint64_t zs_vdev_next_leaf; 403185029Spjd uint64_t zs_vdev_aux; 404168404Spjd uint64_t zs_alloc; 405168404Spjd uint64_t zs_space; 406219089Spjd uint64_t zs_splits; 407219089Spjd uint64_t zs_mirrors; 408236143Smm uint64_t zs_metaslab_sz; 409236143Smm uint64_t zs_metaslab_df_alloc_threshold; 410236143Smm uint64_t zs_guid; 411168404Spjd} ztest_shared_t; 412168404Spjd 413219089Spjd#define ID_PARALLEL -1ULL 414219089Spjd 415168404Spjdstatic char ztest_dev_template[] = "%s/%s.%llua"; 416185029Spjdstatic char ztest_aux_template[] = "%s/%s.%s.%llu"; 417219089Spjdztest_shared_t *ztest_shared; 418168404Spjd 419236143Smmstatic spa_t *ztest_spa = NULL; 420236143Smmstatic ztest_ds_t *ztest_ds; 421168404Spjd 422236143Smmstatic mutex_t ztest_vdev_lock; 423239620Smm 424239620Smm/* 425239620Smm * The ztest_name_lock protects the pool and dataset namespace used by 426239620Smm * the individual tests. To modify the namespace, consumers must grab 427239620Smm * this lock as writer. Grabbing the lock as reader will ensure that the 428239620Smm * namespace does not change while the lock is held. 429239620Smm */ 430236143Smmstatic rwlock_t ztest_name_lock; 431236143Smm 432236143Smmstatic boolean_t ztest_dump_core = B_TRUE; 433185029Spjdstatic boolean_t ztest_exiting; 434168404Spjd 435219089Spjd/* Global commit callback list */ 436219089Spjdstatic ztest_cb_list_t zcl; 437219089Spjd 438219089Spjdenum ztest_object { 439219089Spjd ZTEST_META_DNODE = 0, 440219089Spjd ZTEST_DIROBJ, 441219089Spjd ZTEST_OBJECTS 442219089Spjd}; 443168404Spjd 444168676Spjdstatic void usage(boolean_t) __NORETURN; 445168498Spjd 446168404Spjd/* 447168404Spjd * These libumem hooks provide a reasonable set of defaults for the allocator's 448168404Spjd * debugging facilities. 449168404Spjd */ 450168404Spjdconst char * 451168404Spjd_umem_debug_init() 452168404Spjd{ 453168404Spjd return ("default,verbose"); /* $UMEM_DEBUG setting */ 454168404Spjd} 455168404Spjd 456168404Spjdconst char * 457168404Spjd_umem_logging_init(void) 458168404Spjd{ 459168404Spjd return ("fail,contents"); /* $UMEM_LOGGING setting */ 460168404Spjd} 461168404Spjd 462168404Spjd#define FATAL_MSG_SZ 1024 463168404Spjd 464168404Spjdchar *fatal_msg; 465168404Spjd 466168404Spjdstatic void 467168404Spjdfatal(int do_perror, char *message, ...) 468168404Spjd{ 469168404Spjd va_list args; 470168404Spjd int save_errno = errno; 471168404Spjd char buf[FATAL_MSG_SZ]; 472168404Spjd 473168404Spjd (void) fflush(stdout); 474168404Spjd 475168404Spjd va_start(args, message); 476168404Spjd (void) sprintf(buf, "ztest: "); 477168404Spjd /* LINTED */ 478168404Spjd (void) vsprintf(buf + strlen(buf), message, args); 479168404Spjd va_end(args); 480168404Spjd if (do_perror) { 481168404Spjd (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf), 482168404Spjd ": %s", strerror(save_errno)); 483168404Spjd } 484168404Spjd (void) fprintf(stderr, "%s\n", buf); 485168404Spjd fatal_msg = buf; /* to ease debugging */ 486168404Spjd if (ztest_dump_core) 487168404Spjd abort(); 488168404Spjd exit(3); 489168404Spjd} 490168404Spjd 491168404Spjdstatic int 492168404Spjdstr2shift(const char *buf) 493168404Spjd{ 494168404Spjd const char *ends = "BKMGTPEZ"; 495168404Spjd int i; 496168404Spjd 497168404Spjd if (buf[0] == '\0') 498168404Spjd return (0); 499168404Spjd for (i = 0; i < strlen(ends); i++) { 500168404Spjd if (toupper(buf[0]) == ends[i]) 501168404Spjd break; 502168404Spjd } 503168498Spjd if (i == strlen(ends)) { 504168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", 505168498Spjd buf); 506168498Spjd usage(B_FALSE); 507168498Spjd } 508168404Spjd if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) { 509168404Spjd return (10*i); 510168404Spjd } 511168498Spjd (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf); 512168498Spjd usage(B_FALSE); 513168498Spjd /* NOTREACHED */ 514168404Spjd} 515168404Spjd 516168404Spjdstatic uint64_t 517168404Spjdnicenumtoull(const char *buf) 518168404Spjd{ 519168404Spjd char *end; 520168404Spjd uint64_t val; 521168404Spjd 522168404Spjd val = strtoull(buf, &end, 0); 523168404Spjd if (end == buf) { 524168498Spjd (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf); 525168498Spjd usage(B_FALSE); 526168404Spjd } else if (end[0] == '.') { 527168404Spjd double fval = strtod(buf, &end); 528168404Spjd fval *= pow(2, str2shift(end)); 529168498Spjd if (fval > UINT64_MAX) { 530168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 531168498Spjd buf); 532168498Spjd usage(B_FALSE); 533168498Spjd } 534168404Spjd val = (uint64_t)fval; 535168404Spjd } else { 536168404Spjd int shift = str2shift(end); 537168498Spjd if (shift >= 64 || (val << shift) >> shift != val) { 538168498Spjd (void) fprintf(stderr, "ztest: value too large: %s\n", 539168498Spjd buf); 540168498Spjd usage(B_FALSE); 541168498Spjd } 542168404Spjd val <<= shift; 543168404Spjd } 544168404Spjd return (val); 545168404Spjd} 546168404Spjd 547168404Spjdstatic void 548168498Spjdusage(boolean_t requested) 549168404Spjd{ 550236143Smm const ztest_shared_opts_t *zo = &ztest_opts_defaults; 551236143Smm 552168404Spjd char nice_vdev_size[10]; 553168404Spjd char nice_gang_bang[10]; 554168498Spjd FILE *fp = requested ? stdout : stderr; 555168404Spjd 556236143Smm nicenum(zo->zo_vdev_size, nice_vdev_size); 557236143Smm nicenum(zo->zo_metaslab_gang_bang, nice_gang_bang); 558168404Spjd 559168498Spjd (void) fprintf(fp, "Usage: %s\n" 560168404Spjd "\t[-v vdevs (default: %llu)]\n" 561168404Spjd "\t[-s size_of_each_vdev (default: %s)]\n" 562219089Spjd "\t[-a alignment_shift (default: %d)] use 0 for random\n" 563168404Spjd "\t[-m mirror_copies (default: %d)]\n" 564168404Spjd "\t[-r raidz_disks (default: %d)]\n" 565168404Spjd "\t[-R raidz_parity (default: %d)]\n" 566168404Spjd "\t[-d datasets (default: %d)]\n" 567168404Spjd "\t[-t threads (default: %d)]\n" 568168404Spjd "\t[-g gang_block_threshold (default: %s)]\n" 569219089Spjd "\t[-i init_count (default: %d)] initialize pool i times\n" 570219089Spjd "\t[-k kill_percentage (default: %llu%%)]\n" 571168404Spjd "\t[-p pool_name (default: %s)]\n" 572219089Spjd "\t[-f dir (default: %s)] file directory for vdev files\n" 573219089Spjd "\t[-V] verbose (use multiple times for ever more blather)\n" 574219089Spjd "\t[-E] use existing pool instead of creating new one\n" 575219089Spjd "\t[-T time (default: %llu sec)] total run time\n" 576219089Spjd "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n" 577219089Spjd "\t[-P passtime (default: %llu sec)] time per pass\n" 578236143Smm "\t[-B alt_ztest (default: <none>)] alternate ztest path\n" 579168498Spjd "\t[-h] (print help)\n" 580168404Spjd "", 581236143Smm zo->zo_pool, 582236143Smm (u_longlong_t)zo->zo_vdevs, /* -v */ 583185029Spjd nice_vdev_size, /* -s */ 584236143Smm zo->zo_ashift, /* -a */ 585236143Smm zo->zo_mirrors, /* -m */ 586236143Smm zo->zo_raidz, /* -r */ 587236143Smm zo->zo_raidz_parity, /* -R */ 588236143Smm zo->zo_datasets, /* -d */ 589236143Smm zo->zo_threads, /* -t */ 590185029Spjd nice_gang_bang, /* -g */ 591236143Smm zo->zo_init, /* -i */ 592236143Smm (u_longlong_t)zo->zo_killrate, /* -k */ 593236143Smm zo->zo_pool, /* -p */ 594236143Smm zo->zo_dir, /* -f */ 595236143Smm (u_longlong_t)zo->zo_time, /* -T */ 596236143Smm (u_longlong_t)zo->zo_maxloops, /* -F */ 597236143Smm (u_longlong_t)zo->zo_passtime); 598168498Spjd exit(requested ? 0 : 1); 599168404Spjd} 600168404Spjd 601168404Spjdstatic void 602168404Spjdprocess_options(int argc, char **argv) 603168404Spjd{ 604236143Smm char *path; 605236143Smm ztest_shared_opts_t *zo = &ztest_opts; 606236143Smm 607168404Spjd int opt; 608168404Spjd uint64_t value; 609236143Smm char altdir[MAXNAMELEN] = { 0 }; 610168404Spjd 611236143Smm bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); 612168404Spjd 613168404Spjd while ((opt = getopt(argc, argv, 614236143Smm "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:")) != EOF) { 615168404Spjd value = 0; 616168404Spjd switch (opt) { 617185029Spjd case 'v': 618185029Spjd case 's': 619185029Spjd case 'a': 620185029Spjd case 'm': 621185029Spjd case 'r': 622185029Spjd case 'R': 623185029Spjd case 'd': 624185029Spjd case 't': 625185029Spjd case 'g': 626185029Spjd case 'i': 627185029Spjd case 'k': 628185029Spjd case 'T': 629185029Spjd case 'P': 630219089Spjd case 'F': 631168404Spjd value = nicenumtoull(optarg); 632168404Spjd } 633168404Spjd switch (opt) { 634185029Spjd case 'v': 635236143Smm zo->zo_vdevs = value; 636168404Spjd break; 637185029Spjd case 's': 638236143Smm zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value); 639168404Spjd break; 640185029Spjd case 'a': 641236143Smm zo->zo_ashift = value; 642168404Spjd break; 643185029Spjd case 'm': 644236143Smm zo->zo_mirrors = value; 645168404Spjd break; 646185029Spjd case 'r': 647236143Smm zo->zo_raidz = MAX(1, value); 648168404Spjd break; 649185029Spjd case 'R': 650236143Smm zo->zo_raidz_parity = MIN(MAX(value, 1), 3); 651168404Spjd break; 652185029Spjd case 'd': 653236143Smm zo->zo_datasets = MAX(1, value); 654168404Spjd break; 655185029Spjd case 't': 656236143Smm zo->zo_threads = MAX(1, value); 657168404Spjd break; 658185029Spjd case 'g': 659236143Smm zo->zo_metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, 660236143Smm value); 661168404Spjd break; 662185029Spjd case 'i': 663236143Smm zo->zo_init = value; 664168404Spjd break; 665185029Spjd case 'k': 666236143Smm zo->zo_killrate = value; 667168404Spjd break; 668185029Spjd case 'p': 669236143Smm (void) strlcpy(zo->zo_pool, optarg, 670236143Smm sizeof (zo->zo_pool)); 671168404Spjd break; 672185029Spjd case 'f': 673236143Smm path = realpath(optarg, NULL); 674236143Smm if (path == NULL) { 675236143Smm (void) fprintf(stderr, "error: %s: %s\n", 676236143Smm optarg, strerror(errno)); 677236143Smm usage(B_FALSE); 678236143Smm } else { 679236143Smm (void) strlcpy(zo->zo_dir, path, 680236143Smm sizeof (zo->zo_dir)); 681236143Smm } 682168404Spjd break; 683185029Spjd case 'V': 684236143Smm zo->zo_verbose++; 685168404Spjd break; 686185029Spjd case 'E': 687236143Smm zo->zo_init = 0; 688168404Spjd break; 689185029Spjd case 'T': 690236143Smm zo->zo_time = value; 691168404Spjd break; 692185029Spjd case 'P': 693236143Smm zo->zo_passtime = MAX(1, value); 694168404Spjd break; 695219089Spjd case 'F': 696236143Smm zo->zo_maxloops = MAX(1, value); 697219089Spjd break; 698236143Smm case 'B': 699236143Smm (void) strlcpy(altdir, optarg, sizeof (altdir)); 700236143Smm break; 701185029Spjd case 'h': 702168498Spjd usage(B_TRUE); 703168498Spjd break; 704185029Spjd case '?': 705185029Spjd default: 706168498Spjd usage(B_FALSE); 707168404Spjd break; 708168404Spjd } 709168404Spjd } 710168404Spjd 711236143Smm zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1); 712168404Spjd 713236143Smm zo->zo_vdevtime = 714236143Smm (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs : 715219089Spjd UINT64_MAX >> 2); 716236143Smm 717236143Smm if (strlen(altdir) > 0) { 718242845Sdelphij char *cmd; 719242845Sdelphij char *realaltdir; 720236143Smm char *bin; 721236143Smm char *ztest; 722236143Smm char *isa; 723236143Smm int isalen; 724236143Smm 725242845Sdelphij cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 726242845Sdelphij realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 727242845Sdelphij 728242845Sdelphij VERIFY(NULL != realpath(getexecname(), cmd)); 729236143Smm if (0 != access(altdir, F_OK)) { 730236143Smm ztest_dump_core = B_FALSE; 731236143Smm fatal(B_TRUE, "invalid alternate ztest path: %s", 732236143Smm altdir); 733236143Smm } 734236143Smm VERIFY(NULL != realpath(altdir, realaltdir)); 735236143Smm 736236143Smm /* 737236143Smm * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest". 738236143Smm * We want to extract <isa> to determine if we should use 739236143Smm * 32 or 64 bit binaries. 740236143Smm */ 741236143Smm bin = strstr(cmd, "/usr/bin/"); 742236143Smm ztest = strstr(bin, "/ztest"); 743236143Smm isa = bin + 9; 744236143Smm isalen = ztest - isa; 745236143Smm (void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest), 746236143Smm "%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa); 747236143Smm (void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath), 748236143Smm "%s/usr/lib/%.*s", realaltdir, isalen, isa); 749236143Smm 750236143Smm if (0 != access(zo->zo_alt_ztest, X_OK)) { 751236143Smm ztest_dump_core = B_FALSE; 752236143Smm fatal(B_TRUE, "invalid alternate ztest: %s", 753236143Smm zo->zo_alt_ztest); 754236143Smm } else if (0 != access(zo->zo_alt_libpath, X_OK)) { 755236143Smm ztest_dump_core = B_FALSE; 756236143Smm fatal(B_TRUE, "invalid alternate lib directory %s", 757236143Smm zo->zo_alt_libpath); 758236143Smm } 759242845Sdelphij 760242845Sdelphij umem_free(cmd, MAXPATHLEN); 761242845Sdelphij umem_free(realaltdir, MAXPATHLEN); 762236143Smm } 763168404Spjd} 764168404Spjd 765219089Spjdstatic void 766219089Spjdztest_kill(ztest_shared_t *zs) 767219089Spjd{ 768236143Smm zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa)); 769236143Smm zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa)); 770219089Spjd (void) kill(getpid(), SIGKILL); 771219089Spjd} 772219089Spjd 773168404Spjdstatic uint64_t 774219089Spjdztest_random(uint64_t range) 775219089Spjd{ 776219089Spjd uint64_t r; 777219089Spjd 778242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 779242845Sdelphij 780219089Spjd if (range == 0) 781219089Spjd return (0); 782219089Spjd 783242845Sdelphij if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r)) 784219089Spjd fatal(1, "short read from /dev/urandom"); 785219089Spjd 786219089Spjd return (r % range); 787219089Spjd} 788219089Spjd 789219089Spjd/* ARGSUSED */ 790219089Spjdstatic void 791219089Spjdztest_record_enospc(const char *s) 792219089Spjd{ 793219089Spjd ztest_shared->zs_enospc_count++; 794219089Spjd} 795219089Spjd 796219089Spjdstatic uint64_t 797168404Spjdztest_get_ashift(void) 798168404Spjd{ 799236143Smm if (ztest_opts.zo_ashift == 0) 800168404Spjd return (SPA_MINBLOCKSHIFT + ztest_random(3)); 801236143Smm return (ztest_opts.zo_ashift); 802168404Spjd} 803168404Spjd 804168404Spjdstatic nvlist_t * 805243505Smmmake_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift) 806168404Spjd{ 807185029Spjd char pathbuf[MAXPATHLEN]; 808168404Spjd uint64_t vdev; 809168404Spjd nvlist_t *file; 810168404Spjd 811185029Spjd if (ashift == 0) 812185029Spjd ashift = ztest_get_ashift(); 813168404Spjd 814185029Spjd if (path == NULL) { 815185029Spjd path = pathbuf; 816185029Spjd 817185029Spjd if (aux != NULL) { 818185029Spjd vdev = ztest_shared->zs_vdev_aux; 819236143Smm (void) snprintf(path, sizeof (pathbuf), 820236143Smm ztest_aux_template, ztest_opts.zo_dir, 821243505Smm pool == NULL ? ztest_opts.zo_pool : pool, 822243505Smm aux, vdev); 823185029Spjd } else { 824219089Spjd vdev = ztest_shared->zs_vdev_next_leaf++; 825236143Smm (void) snprintf(path, sizeof (pathbuf), 826236143Smm ztest_dev_template, ztest_opts.zo_dir, 827243505Smm pool == NULL ? ztest_opts.zo_pool : pool, vdev); 828185029Spjd } 829185029Spjd } 830185029Spjd 831185029Spjd if (size != 0) { 832185029Spjd int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666); 833168404Spjd if (fd == -1) 834185029Spjd fatal(1, "can't open %s", path); 835168404Spjd if (ftruncate(fd, size) != 0) 836185029Spjd fatal(1, "can't ftruncate %s", path); 837168404Spjd (void) close(fd); 838168404Spjd } 839168404Spjd 840168404Spjd VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0); 841168404Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0); 842185029Spjd VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0); 843168404Spjd VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0); 844168404Spjd 845168404Spjd return (file); 846168404Spjd} 847168404Spjd 848168404Spjdstatic nvlist_t * 849243505Smmmake_vdev_raidz(char *path, char *aux, char *pool, size_t size, 850243505Smm uint64_t ashift, int r) 851168404Spjd{ 852168404Spjd nvlist_t *raidz, **child; 853168404Spjd int c; 854168404Spjd 855168404Spjd if (r < 2) 856243505Smm return (make_vdev_file(path, aux, pool, size, ashift)); 857168404Spjd child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL); 858168404Spjd 859168404Spjd for (c = 0; c < r; c++) 860243505Smm child[c] = make_vdev_file(path, aux, pool, size, ashift); 861168404Spjd 862168404Spjd VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0); 863168404Spjd VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE, 864168404Spjd VDEV_TYPE_RAIDZ) == 0); 865168404Spjd VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY, 866236143Smm ztest_opts.zo_raidz_parity) == 0); 867168404Spjd VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN, 868168404Spjd child, r) == 0); 869168404Spjd 870168404Spjd for (c = 0; c < r; c++) 871168404Spjd nvlist_free(child[c]); 872168404Spjd 873168404Spjd umem_free(child, r * sizeof (nvlist_t *)); 874168404Spjd 875168404Spjd return (raidz); 876168404Spjd} 877168404Spjd 878168404Spjdstatic nvlist_t * 879243505Smmmake_vdev_mirror(char *path, char *aux, char *pool, size_t size, 880243505Smm uint64_t ashift, int r, int m) 881168404Spjd{ 882168404Spjd nvlist_t *mirror, **child; 883168404Spjd int c; 884168404Spjd 885168404Spjd if (m < 1) 886243505Smm return (make_vdev_raidz(path, aux, pool, size, ashift, r)); 887168404Spjd 888168404Spjd child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL); 889168404Spjd 890168404Spjd for (c = 0; c < m; c++) 891243505Smm child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r); 892168404Spjd 893168404Spjd VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0); 894168404Spjd VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, 895168404Spjd VDEV_TYPE_MIRROR) == 0); 896168404Spjd VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN, 897168404Spjd child, m) == 0); 898168404Spjd 899168404Spjd for (c = 0; c < m; c++) 900168404Spjd nvlist_free(child[c]); 901168404Spjd 902168404Spjd umem_free(child, m * sizeof (nvlist_t *)); 903168404Spjd 904168404Spjd return (mirror); 905168404Spjd} 906168404Spjd 907168404Spjdstatic nvlist_t * 908243505Smmmake_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift, 909243505Smm int log, int r, int m, int t) 910168404Spjd{ 911168404Spjd nvlist_t *root, **child; 912168404Spjd int c; 913168404Spjd 914168404Spjd ASSERT(t > 0); 915168404Spjd 916168404Spjd child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); 917168404Spjd 918185029Spjd for (c = 0; c < t; c++) { 919243505Smm child[c] = make_vdev_mirror(path, aux, pool, size, ashift, 920243505Smm r, m); 921185029Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, 922185029Spjd log) == 0); 923185029Spjd } 924168404Spjd 925168404Spjd VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); 926168404Spjd VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); 927185029Spjd VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN, 928168404Spjd child, t) == 0); 929168404Spjd 930168404Spjd for (c = 0; c < t; c++) 931168404Spjd nvlist_free(child[c]); 932168404Spjd 933168404Spjd umem_free(child, t * sizeof (nvlist_t *)); 934168404Spjd 935168404Spjd return (root); 936168404Spjd} 937168404Spjd 938243505Smm/* 939243505Smm * Find a random spa version. Returns back a random spa version in the 940243505Smm * range [initial_version, SPA_VERSION_FEATURES]. 941243505Smm */ 942243505Smmstatic uint64_t 943243505Smmztest_random_spa_version(uint64_t initial_version) 944243505Smm{ 945243505Smm uint64_t version = initial_version; 946243505Smm 947243505Smm if (version <= SPA_VERSION_BEFORE_FEATURES) { 948243505Smm version = version + 949243505Smm ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1); 950243505Smm } 951243505Smm 952243505Smm if (version > SPA_VERSION_BEFORE_FEATURES) 953243505Smm version = SPA_VERSION_FEATURES; 954243505Smm 955243505Smm ASSERT(SPA_VERSION_IS_SUPPORTED(version)); 956243505Smm return (version); 957243505Smm} 958243505Smm 959219089Spjdstatic int 960219089Spjdztest_random_blocksize(void) 961219089Spjd{ 962219089Spjd return (1 << (SPA_MINBLOCKSHIFT + 963219089Spjd ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1))); 964219089Spjd} 965219089Spjd 966219089Spjdstatic int 967219089Spjdztest_random_ibshift(void) 968219089Spjd{ 969219089Spjd return (DN_MIN_INDBLKSHIFT + 970219089Spjd ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1)); 971219089Spjd} 972219089Spjd 973219089Spjdstatic uint64_t 974219089Spjdztest_random_vdev_top(spa_t *spa, boolean_t log_ok) 975219089Spjd{ 976219089Spjd uint64_t top; 977219089Spjd vdev_t *rvd = spa->spa_root_vdev; 978219089Spjd vdev_t *tvd; 979219089Spjd 980219089Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); 981219089Spjd 982219089Spjd do { 983219089Spjd top = ztest_random(rvd->vdev_children); 984219089Spjd tvd = rvd->vdev_child[top]; 985219089Spjd } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) || 986219089Spjd tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL); 987219089Spjd 988219089Spjd return (top); 989219089Spjd} 990219089Spjd 991219089Spjdstatic uint64_t 992219089Spjdztest_random_dsl_prop(zfs_prop_t prop) 993219089Spjd{ 994219089Spjd uint64_t value; 995219089Spjd 996219089Spjd do { 997219089Spjd value = zfs_prop_random_value(prop, ztest_random(-1ULL)); 998219089Spjd } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF); 999219089Spjd 1000219089Spjd return (value); 1001219089Spjd} 1002219089Spjd 1003219089Spjdstatic int 1004219089Spjdztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value, 1005219089Spjd boolean_t inherit) 1006219089Spjd{ 1007219089Spjd const char *propname = zfs_prop_to_name(prop); 1008219089Spjd const char *valname; 1009219089Spjd char setpoint[MAXPATHLEN]; 1010219089Spjd uint64_t curval; 1011219089Spjd int error; 1012219089Spjd 1013248571Smm error = dsl_prop_set_int(osname, propname, 1014248571Smm (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value); 1015219089Spjd 1016219089Spjd if (error == ENOSPC) { 1017219089Spjd ztest_record_enospc(FTAG); 1018219089Spjd return (error); 1019219089Spjd } 1020240415Smm ASSERT0(error); 1021219089Spjd 1022248571Smm VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint)); 1023219089Spjd 1024236143Smm if (ztest_opts.zo_verbose >= 6) { 1025219089Spjd VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0); 1026219089Spjd (void) printf("%s %s = %s at '%s'\n", 1027219089Spjd osname, propname, valname, setpoint); 1028219089Spjd } 1029219089Spjd 1030219089Spjd return (error); 1031219089Spjd} 1032219089Spjd 1033219089Spjdstatic int 1034236143Smmztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) 1035219089Spjd{ 1036236143Smm spa_t *spa = ztest_spa; 1037219089Spjd nvlist_t *props = NULL; 1038219089Spjd int error; 1039219089Spjd 1040219089Spjd VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 1041219089Spjd VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0); 1042219089Spjd 1043219089Spjd error = spa_prop_set(spa, props); 1044219089Spjd 1045219089Spjd nvlist_free(props); 1046219089Spjd 1047219089Spjd if (error == ENOSPC) { 1048219089Spjd ztest_record_enospc(FTAG); 1049219089Spjd return (error); 1050219089Spjd } 1051240415Smm ASSERT0(error); 1052219089Spjd 1053219089Spjd return (error); 1054219089Spjd} 1055219089Spjd 1056168404Spjdstatic void 1057219089Spjdztest_rll_init(rll_t *rll) 1058168404Spjd{ 1059219089Spjd rll->rll_writer = NULL; 1060219089Spjd rll->rll_readers = 0; 1061219089Spjd VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0); 1062219089Spjd VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0); 1063219089Spjd} 1064219089Spjd 1065219089Spjdstatic void 1066219089Spjdztest_rll_destroy(rll_t *rll) 1067219089Spjd{ 1068219089Spjd ASSERT(rll->rll_writer == NULL); 1069219089Spjd ASSERT(rll->rll_readers == 0); 1070219089Spjd VERIFY(_mutex_destroy(&rll->rll_lock) == 0); 1071219089Spjd VERIFY(cond_destroy(&rll->rll_cv) == 0); 1072219089Spjd} 1073219089Spjd 1074219089Spjdstatic void 1075219089Spjdztest_rll_lock(rll_t *rll, rl_type_t type) 1076219089Spjd{ 1077219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1078219089Spjd 1079219089Spjd if (type == RL_READER) { 1080219089Spjd while (rll->rll_writer != NULL) 1081219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1082219089Spjd rll->rll_readers++; 1083219089Spjd } else { 1084219089Spjd while (rll->rll_writer != NULL || rll->rll_readers) 1085219089Spjd (void) cond_wait(&rll->rll_cv, &rll->rll_lock); 1086219089Spjd rll->rll_writer = curthread; 1087219089Spjd } 1088219089Spjd 1089219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1090219089Spjd} 1091219089Spjd 1092219089Spjdstatic void 1093219089Spjdztest_rll_unlock(rll_t *rll) 1094219089Spjd{ 1095219089Spjd VERIFY(mutex_lock(&rll->rll_lock) == 0); 1096219089Spjd 1097219089Spjd if (rll->rll_writer) { 1098219089Spjd ASSERT(rll->rll_readers == 0); 1099219089Spjd rll->rll_writer = NULL; 1100219089Spjd } else { 1101219089Spjd ASSERT(rll->rll_readers != 0); 1102219089Spjd ASSERT(rll->rll_writer == NULL); 1103219089Spjd rll->rll_readers--; 1104219089Spjd } 1105219089Spjd 1106219089Spjd if (rll->rll_writer == NULL && rll->rll_readers == 0) 1107219089Spjd VERIFY(cond_broadcast(&rll->rll_cv) == 0); 1108219089Spjd 1109219089Spjd VERIFY(mutex_unlock(&rll->rll_lock) == 0); 1110219089Spjd} 1111219089Spjd 1112219089Spjdstatic void 1113219089Spjdztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type) 1114219089Spjd{ 1115219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1116219089Spjd 1117219089Spjd ztest_rll_lock(rll, type); 1118219089Spjd} 1119219089Spjd 1120219089Spjdstatic void 1121219089Spjdztest_object_unlock(ztest_ds_t *zd, uint64_t object) 1122219089Spjd{ 1123219089Spjd rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)]; 1124219089Spjd 1125219089Spjd ztest_rll_unlock(rll); 1126219089Spjd} 1127219089Spjd 1128219089Spjdstatic rl_t * 1129219089Spjdztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, 1130219089Spjd uint64_t size, rl_type_t type) 1131219089Spjd{ 1132219089Spjd uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); 1133219089Spjd rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; 1134219089Spjd rl_t *rl; 1135219089Spjd 1136219089Spjd rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); 1137219089Spjd rl->rl_object = object; 1138219089Spjd rl->rl_offset = offset; 1139219089Spjd rl->rl_size = size; 1140219089Spjd rl->rl_lock = rll; 1141219089Spjd 1142219089Spjd ztest_rll_lock(rll, type); 1143219089Spjd 1144219089Spjd return (rl); 1145219089Spjd} 1146219089Spjd 1147219089Spjdstatic void 1148219089Spjdztest_range_unlock(rl_t *rl) 1149219089Spjd{ 1150219089Spjd rll_t *rll = rl->rl_lock; 1151219089Spjd 1152219089Spjd ztest_rll_unlock(rll); 1153219089Spjd 1154219089Spjd umem_free(rl, sizeof (*rl)); 1155219089Spjd} 1156219089Spjd 1157219089Spjdstatic void 1158236143Smmztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os) 1159219089Spjd{ 1160219089Spjd zd->zd_os = os; 1161219089Spjd zd->zd_zilog = dmu_objset_zil(os); 1162236143Smm zd->zd_shared = szd; 1163219089Spjd dmu_objset_name(os, zd->zd_name); 1164219089Spjd 1165236143Smm if (zd->zd_shared != NULL) 1166236143Smm zd->zd_shared->zd_seq = 0; 1167236143Smm 1168224526Smm VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0); 1169219089Spjd VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0); 1170219089Spjd 1171219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1172219089Spjd ztest_rll_init(&zd->zd_object_lock[l]); 1173219089Spjd 1174219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1175219089Spjd ztest_rll_init(&zd->zd_range_lock[l]); 1176219089Spjd} 1177219089Spjd 1178219089Spjdstatic void 1179219089Spjdztest_zd_fini(ztest_ds_t *zd) 1180219089Spjd{ 1181219089Spjd VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0); 1182219089Spjd 1183219089Spjd for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++) 1184219089Spjd ztest_rll_destroy(&zd->zd_object_lock[l]); 1185219089Spjd 1186219089Spjd for (int l = 0; l < ZTEST_RANGE_LOCKS; l++) 1187219089Spjd ztest_rll_destroy(&zd->zd_range_lock[l]); 1188219089Spjd} 1189219089Spjd 1190219089Spjd#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT) 1191219089Spjd 1192219089Spjdstatic uint64_t 1193219089Spjdztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag) 1194219089Spjd{ 1195219089Spjd uint64_t txg; 1196168404Spjd int error; 1197168404Spjd 1198219089Spjd /* 1199219089Spjd * Attempt to assign tx to some transaction group. 1200219089Spjd */ 1201219089Spjd error = dmu_tx_assign(tx, txg_how); 1202168404Spjd if (error) { 1203219089Spjd if (error == ERESTART) { 1204219089Spjd ASSERT(txg_how == TXG_NOWAIT); 1205219089Spjd dmu_tx_wait(tx); 1206219089Spjd } else { 1207219089Spjd ASSERT3U(error, ==, ENOSPC); 1208219089Spjd ztest_record_enospc(tag); 1209219089Spjd } 1210219089Spjd dmu_tx_abort(tx); 1211219089Spjd return (0); 1212168404Spjd } 1213219089Spjd txg = dmu_tx_get_txg(tx); 1214219089Spjd ASSERT(txg != 0); 1215219089Spjd return (txg); 1216168404Spjd} 1217168404Spjd 1218219089Spjdstatic void 1219219089Spjdztest_pattern_set(void *buf, uint64_t size, uint64_t value) 1220168404Spjd{ 1221219089Spjd uint64_t *ip = buf; 1222219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1223168404Spjd 1224219089Spjd while (ip < ip_end) 1225219089Spjd *ip++ = value; 1226219089Spjd} 1227168404Spjd 1228219089Spjdstatic boolean_t 1229219089Spjdztest_pattern_match(void *buf, uint64_t size, uint64_t value) 1230219089Spjd{ 1231219089Spjd uint64_t *ip = buf; 1232219089Spjd uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size); 1233219089Spjd uint64_t diff = 0; 1234168404Spjd 1235219089Spjd while (ip < ip_end) 1236219089Spjd diff |= (value - *ip++); 1237219089Spjd 1238219089Spjd return (diff == 0); 1239168404Spjd} 1240168404Spjd 1241219089Spjdstatic void 1242219089Spjdztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1243219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1244168404Spjd{ 1245219089Spjd bt->bt_magic = BT_MAGIC; 1246219089Spjd bt->bt_objset = dmu_objset_id(os); 1247219089Spjd bt->bt_object = object; 1248219089Spjd bt->bt_offset = offset; 1249219089Spjd bt->bt_gen = gen; 1250219089Spjd bt->bt_txg = txg; 1251219089Spjd bt->bt_crtxg = crtxg; 1252168404Spjd} 1253168404Spjd 1254219089Spjdstatic void 1255219089Spjdztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object, 1256219089Spjd uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) 1257219089Spjd{ 1258219089Spjd ASSERT(bt->bt_magic == BT_MAGIC); 1259219089Spjd ASSERT(bt->bt_objset == dmu_objset_id(os)); 1260219089Spjd ASSERT(bt->bt_object == object); 1261219089Spjd ASSERT(bt->bt_offset == offset); 1262219089Spjd ASSERT(bt->bt_gen <= gen); 1263219089Spjd ASSERT(bt->bt_txg <= txg); 1264219089Spjd ASSERT(bt->bt_crtxg == crtxg); 1265219089Spjd} 1266219089Spjd 1267219089Spjdstatic ztest_block_tag_t * 1268219089Spjdztest_bt_bonus(dmu_buf_t *db) 1269219089Spjd{ 1270219089Spjd dmu_object_info_t doi; 1271219089Spjd ztest_block_tag_t *bt; 1272219089Spjd 1273219089Spjd dmu_object_info_from_db(db, &doi); 1274219089Spjd ASSERT3U(doi.doi_bonus_size, <=, db->db_size); 1275219089Spjd ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt)); 1276219089Spjd bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt)); 1277219089Spjd 1278219089Spjd return (bt); 1279219089Spjd} 1280219089Spjd 1281219089Spjd/* 1282219089Spjd * ZIL logging ops 1283219089Spjd */ 1284219089Spjd 1285219089Spjd#define lrz_type lr_mode 1286219089Spjd#define lrz_blocksize lr_uid 1287219089Spjd#define lrz_ibshift lr_gid 1288219089Spjd#define lrz_bonustype lr_rdev 1289219089Spjd#define lrz_bonuslen lr_crtime[1] 1290219089Spjd 1291219089Spjdstatic void 1292219089Spjdztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr) 1293219089Spjd{ 1294219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1295219089Spjd size_t namesize = strlen(name) + 1; 1296219089Spjd itx_t *itx; 1297219089Spjd 1298219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1299219089Spjd return; 1300219089Spjd 1301219089Spjd itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize); 1302219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1303219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1304219089Spjd 1305219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1306219089Spjd} 1307219089Spjd 1308219089Spjdstatic void 1309219089Spjdztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object) 1310219089Spjd{ 1311219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1312219089Spjd size_t namesize = strlen(name) + 1; 1313219089Spjd itx_t *itx; 1314219089Spjd 1315219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1316219089Spjd return; 1317219089Spjd 1318219089Spjd itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize); 1319219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1320219089Spjd sizeof (*lr) + namesize - sizeof (lr_t)); 1321219089Spjd 1322219089Spjd itx->itx_oid = object; 1323219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1324219089Spjd} 1325219089Spjd 1326219089Spjdstatic void 1327219089Spjdztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr) 1328219089Spjd{ 1329219089Spjd itx_t *itx; 1330219089Spjd itx_wr_state_t write_state = ztest_random(WR_NUM_STATES); 1331219089Spjd 1332219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1333219089Spjd return; 1334219089Spjd 1335219089Spjd if (lr->lr_length > ZIL_MAX_LOG_DATA) 1336219089Spjd write_state = WR_INDIRECT; 1337219089Spjd 1338219089Spjd itx = zil_itx_create(TX_WRITE, 1339219089Spjd sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0)); 1340219089Spjd 1341219089Spjd if (write_state == WR_COPIED && 1342219089Spjd dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length, 1343219089Spjd ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) { 1344219089Spjd zil_itx_destroy(itx); 1345219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1346219089Spjd write_state = WR_NEED_COPY; 1347219089Spjd } 1348219089Spjd itx->itx_private = zd; 1349219089Spjd itx->itx_wr_state = write_state; 1350219089Spjd itx->itx_sync = (ztest_random(8) == 0); 1351219089Spjd itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0); 1352219089Spjd 1353219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1354219089Spjd sizeof (*lr) - sizeof (lr_t)); 1355219089Spjd 1356219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1357219089Spjd} 1358219089Spjd 1359219089Spjdstatic void 1360219089Spjdztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr) 1361219089Spjd{ 1362219089Spjd itx_t *itx; 1363219089Spjd 1364219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1365219089Spjd return; 1366219089Spjd 1367219089Spjd itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 1368219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1369219089Spjd sizeof (*lr) - sizeof (lr_t)); 1370219089Spjd 1371219089Spjd itx->itx_sync = B_FALSE; 1372219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1373219089Spjd} 1374219089Spjd 1375219089Spjdstatic void 1376219089Spjdztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr) 1377219089Spjd{ 1378219089Spjd itx_t *itx; 1379219089Spjd 1380219089Spjd if (zil_replaying(zd->zd_zilog, tx)) 1381219089Spjd return; 1382219089Spjd 1383219089Spjd itx = zil_itx_create(TX_SETATTR, sizeof (*lr)); 1384219089Spjd bcopy(&lr->lr_common + 1, &itx->itx_lr + 1, 1385219089Spjd sizeof (*lr) - sizeof (lr_t)); 1386219089Spjd 1387219089Spjd itx->itx_sync = B_FALSE; 1388219089Spjd zil_itx_assign(zd->zd_zilog, itx, tx); 1389219089Spjd} 1390219089Spjd 1391219089Spjd/* 1392219089Spjd * ZIL replay ops 1393219089Spjd */ 1394168404Spjdstatic int 1395219089Spjdztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap) 1396168404Spjd{ 1397219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1398219089Spjd objset_t *os = zd->zd_os; 1399219089Spjd ztest_block_tag_t *bbt; 1400219089Spjd dmu_buf_t *db; 1401168404Spjd dmu_tx_t *tx; 1402219089Spjd uint64_t txg; 1403219089Spjd int error = 0; 1404168404Spjd 1405168404Spjd if (byteswap) 1406168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1407168404Spjd 1408219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1409219089Spjd ASSERT(name[0] != '\0'); 1410219089Spjd 1411168404Spjd tx = dmu_tx_create(os); 1412219089Spjd 1413219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name); 1414219089Spjd 1415219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1416219089Spjd dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); 1417219089Spjd } else { 1418219089Spjd dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1419219089Spjd } 1420219089Spjd 1421219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1422219089Spjd if (txg == 0) 1423219089Spjd return (ENOSPC); 1424219089Spjd 1425219089Spjd ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid); 1426219089Spjd 1427219089Spjd if (lr->lrz_type == DMU_OT_ZAP_OTHER) { 1428219089Spjd if (lr->lr_foid == 0) { 1429219089Spjd lr->lr_foid = zap_create(os, 1430219089Spjd lr->lrz_type, lr->lrz_bonustype, 1431219089Spjd lr->lrz_bonuslen, tx); 1432219089Spjd } else { 1433219089Spjd error = zap_create_claim(os, lr->lr_foid, 1434219089Spjd lr->lrz_type, lr->lrz_bonustype, 1435219089Spjd lr->lrz_bonuslen, tx); 1436219089Spjd } 1437219089Spjd } else { 1438219089Spjd if (lr->lr_foid == 0) { 1439219089Spjd lr->lr_foid = dmu_object_alloc(os, 1440219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1441219089Spjd lr->lrz_bonuslen, tx); 1442219089Spjd } else { 1443219089Spjd error = dmu_object_claim(os, lr->lr_foid, 1444219089Spjd lr->lrz_type, 0, lr->lrz_bonustype, 1445219089Spjd lr->lrz_bonuslen, tx); 1446219089Spjd } 1447219089Spjd } 1448219089Spjd 1449168404Spjd if (error) { 1450219089Spjd ASSERT3U(error, ==, EEXIST); 1451219089Spjd ASSERT(zd->zd_zilog->zl_replay); 1452219089Spjd dmu_tx_commit(tx); 1453168404Spjd return (error); 1454168404Spjd } 1455168404Spjd 1456219089Spjd ASSERT(lr->lr_foid != 0); 1457219089Spjd 1458219089Spjd if (lr->lrz_type != DMU_OT_ZAP_OTHER) 1459219089Spjd VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid, 1460219089Spjd lr->lrz_blocksize, lr->lrz_ibshift, tx)); 1461219089Spjd 1462219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1463219089Spjd bbt = ztest_bt_bonus(db); 1464219089Spjd dmu_buf_will_dirty(db, tx); 1465219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg); 1466219089Spjd dmu_buf_rele(db, FTAG); 1467219089Spjd 1468219089Spjd VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1, 1469219089Spjd &lr->lr_foid, tx)); 1470219089Spjd 1471219089Spjd (void) ztest_log_create(zd, tx, lr); 1472219089Spjd 1473168404Spjd dmu_tx_commit(tx); 1474168404Spjd 1475219089Spjd return (0); 1476219089Spjd} 1477219089Spjd 1478219089Spjdstatic int 1479219089Spjdztest_replay_remove(ztest_ds_t *zd, lr_remove_t *lr, boolean_t byteswap) 1480219089Spjd{ 1481219089Spjd char *name = (void *)(lr + 1); /* name follows lr */ 1482219089Spjd objset_t *os = zd->zd_os; 1483219089Spjd dmu_object_info_t doi; 1484219089Spjd dmu_tx_t *tx; 1485219089Spjd uint64_t object, txg; 1486219089Spjd 1487219089Spjd if (byteswap) 1488219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1489219089Spjd 1490219089Spjd ASSERT(lr->lr_doid == ZTEST_DIROBJ); 1491219089Spjd ASSERT(name[0] != '\0'); 1492219089Spjd 1493219089Spjd VERIFY3U(0, ==, 1494219089Spjd zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object)); 1495219089Spjd ASSERT(object != 0); 1496219089Spjd 1497219089Spjd ztest_object_lock(zd, object, RL_WRITER); 1498219089Spjd 1499219089Spjd VERIFY3U(0, ==, dmu_object_info(os, object, &doi)); 1500219089Spjd 1501219089Spjd tx = dmu_tx_create(os); 1502219089Spjd 1503219089Spjd dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name); 1504219089Spjd dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END); 1505219089Spjd 1506219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1507219089Spjd if (txg == 0) { 1508219089Spjd ztest_object_unlock(zd, object); 1509219089Spjd return (ENOSPC); 1510168404Spjd } 1511168404Spjd 1512219089Spjd if (doi.doi_type == DMU_OT_ZAP_OTHER) { 1513219089Spjd VERIFY3U(0, ==, zap_destroy(os, object, tx)); 1514219089Spjd } else { 1515219089Spjd VERIFY3U(0, ==, dmu_object_free(os, object, tx)); 1516219089Spjd } 1517219089Spjd 1518219089Spjd VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx)); 1519219089Spjd 1520219089Spjd (void) ztest_log_remove(zd, tx, lr, object); 1521219089Spjd 1522219089Spjd dmu_tx_commit(tx); 1523219089Spjd 1524219089Spjd ztest_object_unlock(zd, object); 1525219089Spjd 1526219089Spjd return (0); 1527168404Spjd} 1528168404Spjd 1529168404Spjdstatic int 1530219089Spjdztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) 1531168404Spjd{ 1532219089Spjd objset_t *os = zd->zd_os; 1533219089Spjd void *data = lr + 1; /* data follows lr */ 1534219089Spjd uint64_t offset, length; 1535219089Spjd ztest_block_tag_t *bt = data; 1536219089Spjd ztest_block_tag_t *bbt; 1537219089Spjd uint64_t gen, txg, lrtxg, crtxg; 1538219089Spjd dmu_object_info_t doi; 1539168404Spjd dmu_tx_t *tx; 1540219089Spjd dmu_buf_t *db; 1541219089Spjd arc_buf_t *abuf = NULL; 1542219089Spjd rl_t *rl; 1543168404Spjd 1544168404Spjd if (byteswap) 1545168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1546168404Spjd 1547219089Spjd offset = lr->lr_offset; 1548219089Spjd length = lr->lr_length; 1549219089Spjd 1550219089Spjd /* If it's a dmu_sync() block, write the whole block */ 1551219089Spjd if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 1552219089Spjd uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 1553219089Spjd if (length < blocksize) { 1554219089Spjd offset -= offset % blocksize; 1555219089Spjd length = blocksize; 1556219089Spjd } 1557219089Spjd } 1558219089Spjd 1559219089Spjd if (bt->bt_magic == BSWAP_64(BT_MAGIC)) 1560219089Spjd byteswap_uint64_array(bt, sizeof (*bt)); 1561219089Spjd 1562219089Spjd if (bt->bt_magic != BT_MAGIC) 1563219089Spjd bt = NULL; 1564219089Spjd 1565219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1566219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); 1567219089Spjd 1568219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1569219089Spjd 1570219089Spjd dmu_object_info_from_db(db, &doi); 1571219089Spjd 1572219089Spjd bbt = ztest_bt_bonus(db); 1573219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1574219089Spjd gen = bbt->bt_gen; 1575219089Spjd crtxg = bbt->bt_crtxg; 1576219089Spjd lrtxg = lr->lr_common.lrc_txg; 1577219089Spjd 1578168404Spjd tx = dmu_tx_create(os); 1579219089Spjd 1580219089Spjd dmu_tx_hold_write(tx, lr->lr_foid, offset, length); 1581219089Spjd 1582219089Spjd if (ztest_random(8) == 0 && length == doi.doi_data_block_size && 1583219089Spjd P2PHASE(offset, length) == 0) 1584219089Spjd abuf = dmu_request_arcbuf(db, length); 1585219089Spjd 1586219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1587219089Spjd if (txg == 0) { 1588219089Spjd if (abuf != NULL) 1589219089Spjd dmu_return_arcbuf(abuf); 1590219089Spjd dmu_buf_rele(db, FTAG); 1591219089Spjd ztest_range_unlock(rl); 1592219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1593219089Spjd return (ENOSPC); 1594168404Spjd } 1595168404Spjd 1596219089Spjd if (bt != NULL) { 1597219089Spjd /* 1598219089Spjd * Usually, verify the old data before writing new data -- 1599219089Spjd * but not always, because we also want to verify correct 1600219089Spjd * behavior when the data was not recently read into cache. 1601219089Spjd */ 1602219089Spjd ASSERT(offset % doi.doi_data_block_size == 0); 1603219089Spjd if (ztest_random(4) != 0) { 1604219089Spjd int prefetch = ztest_random(2) ? 1605219089Spjd DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH; 1606219089Spjd ztest_block_tag_t rbt; 1607219089Spjd 1608219089Spjd VERIFY(dmu_read(os, lr->lr_foid, offset, 1609219089Spjd sizeof (rbt), &rbt, prefetch) == 0); 1610219089Spjd if (rbt.bt_magic == BT_MAGIC) { 1611219089Spjd ztest_bt_verify(&rbt, os, lr->lr_foid, 1612219089Spjd offset, gen, txg, crtxg); 1613219089Spjd } 1614219089Spjd } 1615219089Spjd 1616219089Spjd /* 1617219089Spjd * Writes can appear to be newer than the bonus buffer because 1618219089Spjd * the ztest_get_data() callback does a dmu_read() of the 1619219089Spjd * open-context data, which may be different than the data 1620219089Spjd * as it was when the write was generated. 1621219089Spjd */ 1622219089Spjd if (zd->zd_zilog->zl_replay) { 1623219089Spjd ztest_bt_verify(bt, os, lr->lr_foid, offset, 1624219089Spjd MAX(gen, bt->bt_gen), MAX(txg, lrtxg), 1625219089Spjd bt->bt_crtxg); 1626219089Spjd } 1627219089Spjd 1628219089Spjd /* 1629219089Spjd * Set the bt's gen/txg to the bonus buffer's gen/txg 1630219089Spjd * so that all of the usual ASSERTs will work. 1631219089Spjd */ 1632219089Spjd ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg); 1633219089Spjd } 1634219089Spjd 1635219089Spjd if (abuf == NULL) { 1636219089Spjd dmu_write(os, lr->lr_foid, offset, length, data, tx); 1637219089Spjd } else { 1638219089Spjd bcopy(data, abuf->b_data, length); 1639219089Spjd dmu_assign_arcbuf(db, offset, abuf, tx); 1640219089Spjd } 1641219089Spjd 1642219089Spjd (void) ztest_log_write(zd, tx, lr); 1643219089Spjd 1644219089Spjd dmu_buf_rele(db, FTAG); 1645219089Spjd 1646168404Spjd dmu_tx_commit(tx); 1647168404Spjd 1648219089Spjd ztest_range_unlock(rl); 1649219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1650219089Spjd 1651219089Spjd return (0); 1652168404Spjd} 1653168404Spjd 1654219089Spjdstatic int 1655219089Spjdztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) 1656219089Spjd{ 1657219089Spjd objset_t *os = zd->zd_os; 1658219089Spjd dmu_tx_t *tx; 1659219089Spjd uint64_t txg; 1660219089Spjd rl_t *rl; 1661219089Spjd 1662219089Spjd if (byteswap) 1663219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1664219089Spjd 1665219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_READER); 1666219089Spjd rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, 1667219089Spjd RL_WRITER); 1668219089Spjd 1669219089Spjd tx = dmu_tx_create(os); 1670219089Spjd 1671219089Spjd dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length); 1672219089Spjd 1673219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1674219089Spjd if (txg == 0) { 1675219089Spjd ztest_range_unlock(rl); 1676219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1677219089Spjd return (ENOSPC); 1678219089Spjd } 1679219089Spjd 1680219089Spjd VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset, 1681219089Spjd lr->lr_length, tx) == 0); 1682219089Spjd 1683219089Spjd (void) ztest_log_truncate(zd, tx, lr); 1684219089Spjd 1685219089Spjd dmu_tx_commit(tx); 1686219089Spjd 1687219089Spjd ztest_range_unlock(rl); 1688219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1689219089Spjd 1690219089Spjd return (0); 1691219089Spjd} 1692219089Spjd 1693219089Spjdstatic int 1694219089Spjdztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap) 1695219089Spjd{ 1696219089Spjd objset_t *os = zd->zd_os; 1697219089Spjd dmu_tx_t *tx; 1698219089Spjd dmu_buf_t *db; 1699219089Spjd ztest_block_tag_t *bbt; 1700219089Spjd uint64_t txg, lrtxg, crtxg; 1701219089Spjd 1702219089Spjd if (byteswap) 1703219089Spjd byteswap_uint64_array(lr, sizeof (*lr)); 1704219089Spjd 1705219089Spjd ztest_object_lock(zd, lr->lr_foid, RL_WRITER); 1706219089Spjd 1707219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); 1708219089Spjd 1709219089Spjd tx = dmu_tx_create(os); 1710219089Spjd dmu_tx_hold_bonus(tx, lr->lr_foid); 1711219089Spjd 1712219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 1713219089Spjd if (txg == 0) { 1714219089Spjd dmu_buf_rele(db, FTAG); 1715219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1716219089Spjd return (ENOSPC); 1717219089Spjd } 1718219089Spjd 1719219089Spjd bbt = ztest_bt_bonus(db); 1720219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1721219089Spjd crtxg = bbt->bt_crtxg; 1722219089Spjd lrtxg = lr->lr_common.lrc_txg; 1723219089Spjd 1724219089Spjd if (zd->zd_zilog->zl_replay) { 1725219089Spjd ASSERT(lr->lr_size != 0); 1726219089Spjd ASSERT(lr->lr_mode != 0); 1727219089Spjd ASSERT(lrtxg != 0); 1728219089Spjd } else { 1729219089Spjd /* 1730219089Spjd * Randomly change the size and increment the generation. 1731219089Spjd */ 1732219089Spjd lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) * 1733219089Spjd sizeof (*bbt); 1734219089Spjd lr->lr_mode = bbt->bt_gen + 1; 1735219089Spjd ASSERT(lrtxg == 0); 1736219089Spjd } 1737219089Spjd 1738219089Spjd /* 1739219089Spjd * Verify that the current bonus buffer is not newer than our txg. 1740219089Spjd */ 1741219089Spjd ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, 1742219089Spjd MAX(txg, lrtxg), crtxg); 1743219089Spjd 1744219089Spjd dmu_buf_will_dirty(db, tx); 1745219089Spjd 1746219089Spjd ASSERT3U(lr->lr_size, >=, sizeof (*bbt)); 1747219089Spjd ASSERT3U(lr->lr_size, <=, db->db_size); 1748240415Smm VERIFY0(dmu_set_bonus(db, lr->lr_size, tx)); 1749219089Spjd bbt = ztest_bt_bonus(db); 1750219089Spjd 1751219089Spjd ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg); 1752219089Spjd 1753219089Spjd dmu_buf_rele(db, FTAG); 1754219089Spjd 1755219089Spjd (void) ztest_log_setattr(zd, tx, lr); 1756219089Spjd 1757219089Spjd dmu_tx_commit(tx); 1758219089Spjd 1759219089Spjd ztest_object_unlock(zd, lr->lr_foid); 1760219089Spjd 1761219089Spjd return (0); 1762219089Spjd} 1763219089Spjd 1764168404Spjdzil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { 1765168404Spjd NULL, /* 0 no such transaction type */ 1766168404Spjd ztest_replay_create, /* TX_CREATE */ 1767168404Spjd NULL, /* TX_MKDIR */ 1768168404Spjd NULL, /* TX_MKXATTR */ 1769168404Spjd NULL, /* TX_SYMLINK */ 1770168404Spjd ztest_replay_remove, /* TX_REMOVE */ 1771168404Spjd NULL, /* TX_RMDIR */ 1772168404Spjd NULL, /* TX_LINK */ 1773168404Spjd NULL, /* TX_RENAME */ 1774219089Spjd ztest_replay_write, /* TX_WRITE */ 1775219089Spjd ztest_replay_truncate, /* TX_TRUNCATE */ 1776219089Spjd ztest_replay_setattr, /* TX_SETATTR */ 1777168404Spjd NULL, /* TX_ACL */ 1778209962Smm NULL, /* TX_CREATE_ACL */ 1779209962Smm NULL, /* TX_CREATE_ATTR */ 1780209962Smm NULL, /* TX_CREATE_ACL_ATTR */ 1781209962Smm NULL, /* TX_MKDIR_ACL */ 1782209962Smm NULL, /* TX_MKDIR_ATTR */ 1783209962Smm NULL, /* TX_MKDIR_ACL_ATTR */ 1784209962Smm NULL, /* TX_WRITE2 */ 1785168404Spjd}; 1786168404Spjd 1787168404Spjd/* 1788219089Spjd * ZIL get_data callbacks 1789219089Spjd */ 1790219089Spjd 1791219089Spjdstatic void 1792219089Spjdztest_get_done(zgd_t *zgd, int error) 1793219089Spjd{ 1794219089Spjd ztest_ds_t *zd = zgd->zgd_private; 1795219089Spjd uint64_t object = zgd->zgd_rl->rl_object; 1796219089Spjd 1797219089Spjd if (zgd->zgd_db) 1798219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1799219089Spjd 1800219089Spjd ztest_range_unlock(zgd->zgd_rl); 1801219089Spjd ztest_object_unlock(zd, object); 1802219089Spjd 1803219089Spjd if (error == 0 && zgd->zgd_bp) 1804219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1805219089Spjd 1806219089Spjd umem_free(zgd, sizeof (*zgd)); 1807219089Spjd} 1808219089Spjd 1809219089Spjdstatic int 1810219089Spjdztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1811219089Spjd{ 1812219089Spjd ztest_ds_t *zd = arg; 1813219089Spjd objset_t *os = zd->zd_os; 1814219089Spjd uint64_t object = lr->lr_foid; 1815219089Spjd uint64_t offset = lr->lr_offset; 1816219089Spjd uint64_t size = lr->lr_length; 1817219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1818219089Spjd uint64_t txg = lr->lr_common.lrc_txg; 1819219089Spjd uint64_t crtxg; 1820219089Spjd dmu_object_info_t doi; 1821219089Spjd dmu_buf_t *db; 1822219089Spjd zgd_t *zgd; 1823219089Spjd int error; 1824219089Spjd 1825219089Spjd ztest_object_lock(zd, object, RL_READER); 1826219089Spjd error = dmu_bonus_hold(os, object, FTAG, &db); 1827219089Spjd if (error) { 1828219089Spjd ztest_object_unlock(zd, object); 1829219089Spjd return (error); 1830219089Spjd } 1831219089Spjd 1832219089Spjd crtxg = ztest_bt_bonus(db)->bt_crtxg; 1833219089Spjd 1834219089Spjd if (crtxg == 0 || crtxg > txg) { 1835219089Spjd dmu_buf_rele(db, FTAG); 1836219089Spjd ztest_object_unlock(zd, object); 1837219089Spjd return (ENOENT); 1838219089Spjd } 1839219089Spjd 1840219089Spjd dmu_object_info_from_db(db, &doi); 1841219089Spjd dmu_buf_rele(db, FTAG); 1842219089Spjd db = NULL; 1843219089Spjd 1844219089Spjd zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); 1845219089Spjd zgd->zgd_zilog = zd->zd_zilog; 1846219089Spjd zgd->zgd_private = zd; 1847219089Spjd 1848219089Spjd if (buf != NULL) { /* immediate write */ 1849219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1850219089Spjd RL_READER); 1851219089Spjd 1852219089Spjd error = dmu_read(os, object, offset, size, buf, 1853219089Spjd DMU_READ_NO_PREFETCH); 1854219089Spjd ASSERT(error == 0); 1855219089Spjd } else { 1856219089Spjd size = doi.doi_data_block_size; 1857219089Spjd if (ISP2(size)) { 1858219089Spjd offset = P2ALIGN(offset, size); 1859219089Spjd } else { 1860219089Spjd ASSERT(offset < size); 1861219089Spjd offset = 0; 1862219089Spjd } 1863219089Spjd 1864219089Spjd zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, 1865219089Spjd RL_READER); 1866219089Spjd 1867219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1868219089Spjd DMU_READ_NO_PREFETCH); 1869219089Spjd 1870219089Spjd if (error == 0) { 1871243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1872243524Smm if (obp) { 1873243524Smm ASSERT(BP_IS_HOLE(bp)); 1874243524Smm *bp = *obp; 1875243524Smm } 1876243524Smm 1877219089Spjd zgd->zgd_db = db; 1878219089Spjd zgd->zgd_bp = bp; 1879219089Spjd 1880219089Spjd ASSERT(db->db_offset == offset); 1881219089Spjd ASSERT(db->db_size == size); 1882219089Spjd 1883219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1884219089Spjd ztest_get_done, zgd); 1885219089Spjd 1886219089Spjd if (error == 0) 1887219089Spjd return (0); 1888219089Spjd } 1889219089Spjd } 1890219089Spjd 1891219089Spjd ztest_get_done(zgd, error); 1892219089Spjd 1893219089Spjd return (error); 1894219089Spjd} 1895219089Spjd 1896219089Spjdstatic void * 1897219089Spjdztest_lr_alloc(size_t lrsize, char *name) 1898219089Spjd{ 1899219089Spjd char *lr; 1900219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1901219089Spjd 1902219089Spjd lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL); 1903219089Spjd 1904219089Spjd if (name) 1905219089Spjd bcopy(name, lr + lrsize, namesize); 1906219089Spjd 1907219089Spjd return (lr); 1908219089Spjd} 1909219089Spjd 1910219089Spjdvoid 1911219089Spjdztest_lr_free(void *lr, size_t lrsize, char *name) 1912219089Spjd{ 1913219089Spjd size_t namesize = name ? strlen(name) + 1 : 0; 1914219089Spjd 1915219089Spjd umem_free(lr, lrsize + namesize); 1916219089Spjd} 1917219089Spjd 1918219089Spjd/* 1919219089Spjd * Lookup a bunch of objects. Returns the number of objects not found. 1920219089Spjd */ 1921219089Spjdstatic int 1922219089Spjdztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count) 1923219089Spjd{ 1924219089Spjd int missing = 0; 1925219089Spjd int error; 1926219089Spjd 1927219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1928219089Spjd 1929219089Spjd for (int i = 0; i < count; i++, od++) { 1930219089Spjd od->od_object = 0; 1931219089Spjd error = zap_lookup(zd->zd_os, od->od_dir, od->od_name, 1932219089Spjd sizeof (uint64_t), 1, &od->od_object); 1933219089Spjd if (error) { 1934219089Spjd ASSERT(error == ENOENT); 1935219089Spjd ASSERT(od->od_object == 0); 1936219089Spjd missing++; 1937219089Spjd } else { 1938219089Spjd dmu_buf_t *db; 1939219089Spjd ztest_block_tag_t *bbt; 1940219089Spjd dmu_object_info_t doi; 1941219089Spjd 1942219089Spjd ASSERT(od->od_object != 0); 1943219089Spjd ASSERT(missing == 0); /* there should be no gaps */ 1944219089Spjd 1945219089Spjd ztest_object_lock(zd, od->od_object, RL_READER); 1946219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os, 1947219089Spjd od->od_object, FTAG, &db)); 1948219089Spjd dmu_object_info_from_db(db, &doi); 1949219089Spjd bbt = ztest_bt_bonus(db); 1950219089Spjd ASSERT3U(bbt->bt_magic, ==, BT_MAGIC); 1951219089Spjd od->od_type = doi.doi_type; 1952219089Spjd od->od_blocksize = doi.doi_data_block_size; 1953219089Spjd od->od_gen = bbt->bt_gen; 1954219089Spjd dmu_buf_rele(db, FTAG); 1955219089Spjd ztest_object_unlock(zd, od->od_object); 1956219089Spjd } 1957219089Spjd } 1958219089Spjd 1959219089Spjd return (missing); 1960219089Spjd} 1961219089Spjd 1962219089Spjdstatic int 1963219089Spjdztest_create(ztest_ds_t *zd, ztest_od_t *od, int count) 1964219089Spjd{ 1965219089Spjd int missing = 0; 1966219089Spjd 1967219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 1968219089Spjd 1969219089Spjd for (int i = 0; i < count; i++, od++) { 1970219089Spjd if (missing) { 1971219089Spjd od->od_object = 0; 1972219089Spjd missing++; 1973219089Spjd continue; 1974219089Spjd } 1975219089Spjd 1976219089Spjd lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 1977219089Spjd 1978219089Spjd lr->lr_doid = od->od_dir; 1979219089Spjd lr->lr_foid = 0; /* 0 to allocate, > 0 to claim */ 1980219089Spjd lr->lrz_type = od->od_crtype; 1981219089Spjd lr->lrz_blocksize = od->od_crblocksize; 1982219089Spjd lr->lrz_ibshift = ztest_random_ibshift(); 1983219089Spjd lr->lrz_bonustype = DMU_OT_UINT64_OTHER; 1984219089Spjd lr->lrz_bonuslen = dmu_bonus_max(); 1985219089Spjd lr->lr_gen = od->od_crgen; 1986219089Spjd lr->lr_crtime[0] = time(NULL); 1987219089Spjd 1988219089Spjd if (ztest_replay_create(zd, lr, B_FALSE) != 0) { 1989219089Spjd ASSERT(missing == 0); 1990219089Spjd od->od_object = 0; 1991219089Spjd missing++; 1992219089Spjd } else { 1993219089Spjd od->od_object = lr->lr_foid; 1994219089Spjd od->od_type = od->od_crtype; 1995219089Spjd od->od_blocksize = od->od_crblocksize; 1996219089Spjd od->od_gen = od->od_crgen; 1997219089Spjd ASSERT(od->od_object != 0); 1998219089Spjd } 1999219089Spjd 2000219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2001219089Spjd } 2002219089Spjd 2003219089Spjd return (missing); 2004219089Spjd} 2005219089Spjd 2006219089Spjdstatic int 2007219089Spjdztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count) 2008219089Spjd{ 2009219089Spjd int missing = 0; 2010219089Spjd int error; 2011219089Spjd 2012219089Spjd ASSERT(_mutex_held(&zd->zd_dirobj_lock)); 2013219089Spjd 2014219089Spjd od += count - 1; 2015219089Spjd 2016219089Spjd for (int i = count - 1; i >= 0; i--, od--) { 2017219089Spjd if (missing) { 2018219089Spjd missing++; 2019219089Spjd continue; 2020219089Spjd } 2021219089Spjd 2022243524Smm /* 2023243524Smm * No object was found. 2024243524Smm */ 2025219089Spjd if (od->od_object == 0) 2026219089Spjd continue; 2027219089Spjd 2028219089Spjd lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name); 2029219089Spjd 2030219089Spjd lr->lr_doid = od->od_dir; 2031219089Spjd 2032219089Spjd if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) { 2033219089Spjd ASSERT3U(error, ==, ENOSPC); 2034219089Spjd missing++; 2035219089Spjd } else { 2036219089Spjd od->od_object = 0; 2037219089Spjd } 2038219089Spjd ztest_lr_free(lr, sizeof (*lr), od->od_name); 2039219089Spjd } 2040219089Spjd 2041219089Spjd return (missing); 2042219089Spjd} 2043219089Spjd 2044219089Spjdstatic int 2045219089Spjdztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size, 2046219089Spjd void *data) 2047219089Spjd{ 2048219089Spjd lr_write_t *lr; 2049219089Spjd int error; 2050219089Spjd 2051219089Spjd lr = ztest_lr_alloc(sizeof (*lr) + size, NULL); 2052219089Spjd 2053219089Spjd lr->lr_foid = object; 2054219089Spjd lr->lr_offset = offset; 2055219089Spjd lr->lr_length = size; 2056219089Spjd lr->lr_blkoff = 0; 2057219089Spjd BP_ZERO(&lr->lr_blkptr); 2058219089Spjd 2059219089Spjd bcopy(data, lr + 1, size); 2060219089Spjd 2061219089Spjd error = ztest_replay_write(zd, lr, B_FALSE); 2062219089Spjd 2063219089Spjd ztest_lr_free(lr, sizeof (*lr) + size, NULL); 2064219089Spjd 2065219089Spjd return (error); 2066219089Spjd} 2067219089Spjd 2068219089Spjdstatic int 2069219089Spjdztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2070219089Spjd{ 2071219089Spjd lr_truncate_t *lr; 2072219089Spjd int error; 2073219089Spjd 2074219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2075219089Spjd 2076219089Spjd lr->lr_foid = object; 2077219089Spjd lr->lr_offset = offset; 2078219089Spjd lr->lr_length = size; 2079219089Spjd 2080219089Spjd error = ztest_replay_truncate(zd, lr, B_FALSE); 2081219089Spjd 2082219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2083219089Spjd 2084219089Spjd return (error); 2085219089Spjd} 2086219089Spjd 2087219089Spjdstatic int 2088219089Spjdztest_setattr(ztest_ds_t *zd, uint64_t object) 2089219089Spjd{ 2090219089Spjd lr_setattr_t *lr; 2091219089Spjd int error; 2092219089Spjd 2093219089Spjd lr = ztest_lr_alloc(sizeof (*lr), NULL); 2094219089Spjd 2095219089Spjd lr->lr_foid = object; 2096219089Spjd lr->lr_size = 0; 2097219089Spjd lr->lr_mode = 0; 2098219089Spjd 2099219089Spjd error = ztest_replay_setattr(zd, lr, B_FALSE); 2100219089Spjd 2101219089Spjd ztest_lr_free(lr, sizeof (*lr), NULL); 2102219089Spjd 2103219089Spjd return (error); 2104219089Spjd} 2105219089Spjd 2106219089Spjdstatic void 2107219089Spjdztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) 2108219089Spjd{ 2109219089Spjd objset_t *os = zd->zd_os; 2110219089Spjd dmu_tx_t *tx; 2111219089Spjd uint64_t txg; 2112219089Spjd rl_t *rl; 2113219089Spjd 2114219089Spjd txg_wait_synced(dmu_objset_pool(os), 0); 2115219089Spjd 2116219089Spjd ztest_object_lock(zd, object, RL_READER); 2117219089Spjd rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); 2118219089Spjd 2119219089Spjd tx = dmu_tx_create(os); 2120219089Spjd 2121219089Spjd dmu_tx_hold_write(tx, object, offset, size); 2122219089Spjd 2123219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 2124219089Spjd 2125219089Spjd if (txg != 0) { 2126219089Spjd dmu_prealloc(os, object, offset, size, tx); 2127219089Spjd dmu_tx_commit(tx); 2128219089Spjd txg_wait_synced(dmu_objset_pool(os), txg); 2129219089Spjd } else { 2130219089Spjd (void) dmu_free_long_range(os, object, offset, size); 2131219089Spjd } 2132219089Spjd 2133219089Spjd ztest_range_unlock(rl); 2134219089Spjd ztest_object_unlock(zd, object); 2135219089Spjd} 2136219089Spjd 2137219089Spjdstatic void 2138219089Spjdztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) 2139219089Spjd{ 2140243524Smm int err; 2141219089Spjd ztest_block_tag_t wbt; 2142219089Spjd dmu_object_info_t doi; 2143219089Spjd enum ztest_io_type io_type; 2144219089Spjd uint64_t blocksize; 2145219089Spjd void *data; 2146219089Spjd 2147219089Spjd VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0); 2148219089Spjd blocksize = doi.doi_data_block_size; 2149219089Spjd data = umem_alloc(blocksize, UMEM_NOFAIL); 2150219089Spjd 2151219089Spjd /* 2152219089Spjd * Pick an i/o type at random, biased toward writing block tags. 2153219089Spjd */ 2154219089Spjd io_type = ztest_random(ZTEST_IO_TYPES); 2155219089Spjd if (ztest_random(2) == 0) 2156219089Spjd io_type = ZTEST_IO_WRITE_TAG; 2157219089Spjd 2158224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2159224526Smm 2160219089Spjd switch (io_type) { 2161219089Spjd 2162219089Spjd case ZTEST_IO_WRITE_TAG: 2163219089Spjd ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0); 2164219089Spjd (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt); 2165219089Spjd break; 2166219089Spjd 2167219089Spjd case ZTEST_IO_WRITE_PATTERN: 2168219089Spjd (void) memset(data, 'a' + (object + offset) % 5, blocksize); 2169219089Spjd if (ztest_random(2) == 0) { 2170219089Spjd /* 2171219089Spjd * Induce fletcher2 collisions to ensure that 2172219089Spjd * zio_ddt_collision() detects and resolves them 2173219089Spjd * when using fletcher2-verify for deduplication. 2174219089Spjd */ 2175219089Spjd ((uint64_t *)data)[0] ^= 1ULL << 63; 2176219089Spjd ((uint64_t *)data)[4] ^= 1ULL << 63; 2177219089Spjd } 2178219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2179219089Spjd break; 2180219089Spjd 2181219089Spjd case ZTEST_IO_WRITE_ZEROES: 2182219089Spjd bzero(data, blocksize); 2183219089Spjd (void) ztest_write(zd, object, offset, blocksize, data); 2184219089Spjd break; 2185219089Spjd 2186219089Spjd case ZTEST_IO_TRUNCATE: 2187219089Spjd (void) ztest_truncate(zd, object, offset, blocksize); 2188219089Spjd break; 2189219089Spjd 2190219089Spjd case ZTEST_IO_SETATTR: 2191219089Spjd (void) ztest_setattr(zd, object); 2192219089Spjd break; 2193243524Smm 2194243524Smm case ZTEST_IO_REWRITE: 2195243524Smm (void) rw_rdlock(&ztest_name_lock); 2196243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2197243524Smm ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa), 2198243524Smm B_FALSE); 2199243524Smm VERIFY(err == 0 || err == ENOSPC); 2200243524Smm err = ztest_dsl_prop_set_uint64(zd->zd_name, 2201243524Smm ZFS_PROP_COMPRESSION, 2202243524Smm ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), 2203243524Smm B_FALSE); 2204243524Smm VERIFY(err == 0 || err == ENOSPC); 2205243524Smm (void) rw_unlock(&ztest_name_lock); 2206243524Smm 2207243524Smm VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, 2208243524Smm DMU_READ_NO_PREFETCH)); 2209243524Smm 2210243524Smm (void) ztest_write(zd, object, offset, blocksize, data); 2211243524Smm break; 2212219089Spjd } 2213219089Spjd 2214224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2215224526Smm 2216219089Spjd umem_free(data, blocksize); 2217219089Spjd} 2218219089Spjd 2219219089Spjd/* 2220219089Spjd * Initialize an object description template. 2221219089Spjd */ 2222219089Spjdstatic void 2223219089Spjdztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index, 2224219089Spjd dmu_object_type_t type, uint64_t blocksize, uint64_t gen) 2225219089Spjd{ 2226219089Spjd od->od_dir = ZTEST_DIROBJ; 2227219089Spjd od->od_object = 0; 2228219089Spjd 2229219089Spjd od->od_crtype = type; 2230219089Spjd od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize(); 2231219089Spjd od->od_crgen = gen; 2232219089Spjd 2233219089Spjd od->od_type = DMU_OT_NONE; 2234219089Spjd od->od_blocksize = 0; 2235219089Spjd od->od_gen = 0; 2236219089Spjd 2237219089Spjd (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]", 2238219089Spjd tag, (int64_t)id, index); 2239219089Spjd} 2240219089Spjd 2241219089Spjd/* 2242219089Spjd * Lookup or create the objects for a test using the od template. 2243219089Spjd * If the objects do not all exist, or if 'remove' is specified, 2244219089Spjd * remove any existing objects and create new ones. Otherwise, 2245219089Spjd * use the existing objects. 2246219089Spjd */ 2247219089Spjdstatic int 2248219089Spjdztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove) 2249219089Spjd{ 2250219089Spjd int count = size / sizeof (*od); 2251219089Spjd int rv = 0; 2252219089Spjd 2253219089Spjd VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0); 2254219089Spjd if ((ztest_lookup(zd, od, count) != 0 || remove) && 2255219089Spjd (ztest_remove(zd, od, count) != 0 || 2256219089Spjd ztest_create(zd, od, count) != 0)) 2257219089Spjd rv = -1; 2258219089Spjd zd->zd_od = od; 2259219089Spjd VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2260219089Spjd 2261219089Spjd return (rv); 2262219089Spjd} 2263219089Spjd 2264219089Spjd/* ARGSUSED */ 2265219089Spjdvoid 2266219089Spjdztest_zil_commit(ztest_ds_t *zd, uint64_t id) 2267219089Spjd{ 2268219089Spjd zilog_t *zilog = zd->zd_zilog; 2269219089Spjd 2270224526Smm (void) rw_rdlock(&zd->zd_zilog_lock); 2271224526Smm 2272219089Spjd zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); 2273219089Spjd 2274219089Spjd /* 2275219089Spjd * Remember the committed values in zd, which is in parent/child 2276219089Spjd * shared memory. If we die, the next iteration of ztest_run() 2277219089Spjd * will verify that the log really does contain this record. 2278219089Spjd */ 2279219089Spjd mutex_enter(&zilog->zl_lock); 2280236143Smm ASSERT(zd->zd_shared != NULL); 2281236143Smm ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq); 2282236143Smm zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq; 2283219089Spjd mutex_exit(&zilog->zl_lock); 2284224526Smm 2285224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2286219089Spjd} 2287219089Spjd 2288219089Spjd/* 2289224526Smm * This function is designed to simulate the operations that occur during a 2290224526Smm * mount/unmount operation. We hold the dataset across these operations in an 2291224526Smm * attempt to expose any implicit assumptions about ZIL management. 2292224526Smm */ 2293224526Smm/* ARGSUSED */ 2294224526Smmvoid 2295224526Smmztest_zil_remount(ztest_ds_t *zd, uint64_t id) 2296224526Smm{ 2297224526Smm objset_t *os = zd->zd_os; 2298224526Smm 2299243524Smm /* 2300243524Smm * We grab the zd_dirobj_lock to ensure that no other thread is 2301243524Smm * updating the zil (i.e. adding in-memory log records) and the 2302243524Smm * zd_zilog_lock to block any I/O. 2303243524Smm */ 2304243524Smm VERIFY0(mutex_lock(&zd->zd_dirobj_lock)); 2305224526Smm (void) rw_wrlock(&zd->zd_zilog_lock); 2306224526Smm 2307224526Smm /* zfsvfs_teardown() */ 2308224526Smm zil_close(zd->zd_zilog); 2309224526Smm 2310224526Smm /* zfsvfs_setup() */ 2311224526Smm VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); 2312224526Smm zil_replay(os, zd, ztest_replay_vector); 2313224526Smm 2314224526Smm (void) rw_unlock(&zd->zd_zilog_lock); 2315239620Smm VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); 2316224526Smm} 2317224526Smm 2318224526Smm/* 2319168404Spjd * Verify that we can't destroy an active pool, create an existing pool, 2320168404Spjd * or create a pool with a bad vdev spec. 2321168404Spjd */ 2322219089Spjd/* ARGSUSED */ 2323168404Spjdvoid 2324219089Spjdztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) 2325168404Spjd{ 2326236143Smm ztest_shared_opts_t *zo = &ztest_opts; 2327168404Spjd spa_t *spa; 2328168404Spjd nvlist_t *nvroot; 2329168404Spjd 2330168404Spjd /* 2331168404Spjd * Attempt to create using a bad file. 2332168404Spjd */ 2333243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2334219089Spjd VERIFY3U(ENOENT, ==, 2335248571Smm spa_create("ztest_bad_file", nvroot, NULL, NULL)); 2336168404Spjd nvlist_free(nvroot); 2337168404Spjd 2338168404Spjd /* 2339168404Spjd * Attempt to create using a bad mirror. 2340168404Spjd */ 2341243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); 2342219089Spjd VERIFY3U(ENOENT, ==, 2343248571Smm spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); 2344168404Spjd nvlist_free(nvroot); 2345168404Spjd 2346168404Spjd /* 2347168404Spjd * Attempt to create an existing pool. It shouldn't matter 2348168404Spjd * what's in the nvroot; we should fail with EEXIST. 2349168404Spjd */ 2350236143Smm (void) rw_rdlock(&ztest_name_lock); 2351243505Smm nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); 2352248571Smm VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); 2353168404Spjd nvlist_free(nvroot); 2354236143Smm VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); 2355236143Smm VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); 2356219089Spjd spa_close(spa, FTAG); 2357168404Spjd 2358236143Smm (void) rw_unlock(&ztest_name_lock); 2359168404Spjd} 2360168404Spjd 2361243505Smm/* ARGSUSED */ 2362243505Smmvoid 2363243505Smmztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) 2364243505Smm{ 2365243505Smm spa_t *spa; 2366243505Smm uint64_t initial_version = SPA_VERSION_INITIAL; 2367243505Smm uint64_t version, newversion; 2368243505Smm nvlist_t *nvroot, *props; 2369243505Smm char *name; 2370243505Smm 2371243505Smm VERIFY0(mutex_lock(&ztest_vdev_lock)); 2372243505Smm name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool); 2373243505Smm 2374243505Smm /* 2375243505Smm * Clean up from previous runs. 2376243505Smm */ 2377243505Smm (void) spa_destroy(name); 2378243505Smm 2379243505Smm nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0, 2380243505Smm 0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1); 2381243505Smm 2382243505Smm /* 2383243505Smm * If we're configuring a RAIDZ device then make sure that the 2384243505Smm * the initial version is capable of supporting that feature. 2385243505Smm */ 2386243505Smm switch (ztest_opts.zo_raidz_parity) { 2387243505Smm case 0: 2388243505Smm case 1: 2389243505Smm initial_version = SPA_VERSION_INITIAL; 2390243505Smm break; 2391243505Smm case 2: 2392243505Smm initial_version = SPA_VERSION_RAIDZ2; 2393243505Smm break; 2394243505Smm case 3: 2395243505Smm initial_version = SPA_VERSION_RAIDZ3; 2396243505Smm break; 2397243505Smm } 2398243505Smm 2399243505Smm /* 2400243505Smm * Create a pool with a spa version that can be upgraded. Pick 2401243505Smm * a value between initial_version and SPA_VERSION_BEFORE_FEATURES. 2402243505Smm */ 2403243505Smm do { 2404243505Smm version = ztest_random_spa_version(initial_version); 2405243505Smm } while (version > SPA_VERSION_BEFORE_FEATURES); 2406243505Smm 2407243505Smm props = fnvlist_alloc(); 2408243505Smm fnvlist_add_uint64(props, 2409243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION), version); 2410248571Smm VERIFY0(spa_create(name, nvroot, props, NULL)); 2411243505Smm fnvlist_free(nvroot); 2412243505Smm fnvlist_free(props); 2413243505Smm 2414243505Smm VERIFY0(spa_open(name, &spa, FTAG)); 2415243505Smm VERIFY3U(spa_version(spa), ==, version); 2416243505Smm newversion = ztest_random_spa_version(version + 1); 2417243505Smm 2418243505Smm if (ztest_opts.zo_verbose >= 4) { 2419243505Smm (void) printf("upgrading spa version from %llu to %llu\n", 2420243505Smm (u_longlong_t)version, (u_longlong_t)newversion); 2421243505Smm } 2422243505Smm 2423243505Smm spa_upgrade(spa, newversion); 2424243505Smm VERIFY3U(spa_version(spa), >, version); 2425243505Smm VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config, 2426243505Smm zpool_prop_to_name(ZPOOL_PROP_VERSION))); 2427243505Smm spa_close(spa, FTAG); 2428243505Smm 2429243505Smm strfree(name); 2430243505Smm VERIFY0(mutex_unlock(&ztest_vdev_lock)); 2431243505Smm} 2432243505Smm 2433185029Spjdstatic vdev_t * 2434185029Spjdvdev_lookup_by_path(vdev_t *vd, const char *path) 2435185029Spjd{ 2436185029Spjd vdev_t *mvd; 2437185029Spjd 2438185029Spjd if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0) 2439185029Spjd return (vd); 2440185029Spjd 2441185029Spjd for (int c = 0; c < vd->vdev_children; c++) 2442185029Spjd if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) != 2443185029Spjd NULL) 2444185029Spjd return (mvd); 2445185029Spjd 2446185029Spjd return (NULL); 2447185029Spjd} 2448185029Spjd 2449168404Spjd/* 2450219089Spjd * Find the first available hole which can be used as a top-level. 2451219089Spjd */ 2452219089Spjdint 2453219089Spjdfind_vdev_hole(spa_t *spa) 2454219089Spjd{ 2455219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2456219089Spjd int c; 2457219089Spjd 2458219089Spjd ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV); 2459219089Spjd 2460219089Spjd for (c = 0; c < rvd->vdev_children; c++) { 2461219089Spjd vdev_t *cvd = rvd->vdev_child[c]; 2462219089Spjd 2463219089Spjd if (cvd->vdev_ishole) 2464219089Spjd break; 2465219089Spjd } 2466219089Spjd return (c); 2467219089Spjd} 2468219089Spjd 2469219089Spjd/* 2470168404Spjd * Verify that vdev_add() works as expected. 2471168404Spjd */ 2472219089Spjd/* ARGSUSED */ 2473168404Spjdvoid 2474219089Spjdztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) 2475168404Spjd{ 2476219089Spjd ztest_shared_t *zs = ztest_shared; 2477236143Smm spa_t *spa = ztest_spa; 2478219089Spjd uint64_t leaves; 2479219089Spjd uint64_t guid; 2480168404Spjd nvlist_t *nvroot; 2481168404Spjd int error; 2482168404Spjd 2483236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2484248571Smm leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; 2485168404Spjd 2486185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2487168404Spjd 2488219089Spjd ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; 2489168404Spjd 2490185029Spjd /* 2491219089Spjd * If we have slogs then remove them 1/4 of the time. 2492185029Spjd */ 2493219089Spjd if (spa_has_slogs(spa) && ztest_random(4) == 0) { 2494219089Spjd /* 2495219089Spjd * Grab the guid from the head of the log class rotor. 2496219089Spjd */ 2497219089Spjd guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid; 2498185029Spjd 2499219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2500168404Spjd 2501219089Spjd /* 2502219089Spjd * We have to grab the zs_name_lock as writer to 2503219089Spjd * prevent a race between removing a slog (dmu_objset_find) 2504219089Spjd * and destroying a dataset. Removing the slog will 2505219089Spjd * grab a reference on the dataset which may cause 2506219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 2507219089Spjd * leaving the dataset in an inconsistent state. 2508219089Spjd */ 2509236143Smm VERIFY(rw_wrlock(&ztest_name_lock) == 0); 2510219089Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2511236143Smm VERIFY(rw_unlock(&ztest_name_lock) == 0); 2512168404Spjd 2513219089Spjd if (error && error != EEXIST) 2514219089Spjd fatal(0, "spa_vdev_remove() = %d", error); 2515219089Spjd } else { 2516219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2517219089Spjd 2518219089Spjd /* 2519219089Spjd * Make 1/4 of the devices be log devices. 2520219089Spjd */ 2521243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, 2522236143Smm ztest_opts.zo_vdev_size, 0, 2523236143Smm ztest_random(4) == 0, ztest_opts.zo_raidz, 2524236143Smm zs->zs_mirrors, 1); 2525219089Spjd 2526219089Spjd error = spa_vdev_add(spa, nvroot); 2527219089Spjd nvlist_free(nvroot); 2528219089Spjd 2529219089Spjd if (error == ENOSPC) 2530219089Spjd ztest_record_enospc("spa_vdev_add"); 2531219089Spjd else if (error != 0) 2532219089Spjd fatal(0, "spa_vdev_add() = %d", error); 2533219089Spjd } 2534219089Spjd 2535236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2536168404Spjd} 2537168404Spjd 2538185029Spjd/* 2539185029Spjd * Verify that adding/removing aux devices (l2arc, hot spare) works as expected. 2540185029Spjd */ 2541219089Spjd/* ARGSUSED */ 2542185029Spjdvoid 2543219089Spjdztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) 2544168404Spjd{ 2545219089Spjd ztest_shared_t *zs = ztest_shared; 2546236143Smm spa_t *spa = ztest_spa; 2547185029Spjd vdev_t *rvd = spa->spa_root_vdev; 2548185029Spjd spa_aux_vdev_t *sav; 2549185029Spjd char *aux; 2550185029Spjd uint64_t guid = 0; 2551185029Spjd int error; 2552168404Spjd 2553185029Spjd if (ztest_random(2) == 0) { 2554185029Spjd sav = &spa->spa_spares; 2555185029Spjd aux = ZPOOL_CONFIG_SPARES; 2556185029Spjd } else { 2557185029Spjd sav = &spa->spa_l2cache; 2558185029Spjd aux = ZPOOL_CONFIG_L2CACHE; 2559185029Spjd } 2560185029Spjd 2561236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2562185029Spjd 2563185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2564185029Spjd 2565185029Spjd if (sav->sav_count != 0 && ztest_random(4) == 0) { 2566185029Spjd /* 2567185029Spjd * Pick a random device to remove. 2568185029Spjd */ 2569185029Spjd guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid; 2570185029Spjd } else { 2571185029Spjd /* 2572185029Spjd * Find an unused device we can add. 2573185029Spjd */ 2574219089Spjd zs->zs_vdev_aux = 0; 2575185029Spjd for (;;) { 2576185029Spjd char path[MAXPATHLEN]; 2577185029Spjd int c; 2578236143Smm (void) snprintf(path, sizeof (path), ztest_aux_template, 2579236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, aux, 2580236143Smm zs->zs_vdev_aux); 2581185029Spjd for (c = 0; c < sav->sav_count; c++) 2582185029Spjd if (strcmp(sav->sav_vdevs[c]->vdev_path, 2583185029Spjd path) == 0) 2584185029Spjd break; 2585185029Spjd if (c == sav->sav_count && 2586185029Spjd vdev_lookup_by_path(rvd, path) == NULL) 2587185029Spjd break; 2588219089Spjd zs->zs_vdev_aux++; 2589168404Spjd } 2590168404Spjd } 2591168404Spjd 2592185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2593168404Spjd 2594185029Spjd if (guid == 0) { 2595185029Spjd /* 2596185029Spjd * Add a new device. 2597185029Spjd */ 2598243505Smm nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL, 2599236143Smm (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1); 2600185029Spjd error = spa_vdev_add(spa, nvroot); 2601185029Spjd if (error != 0) 2602185029Spjd fatal(0, "spa_vdev_add(%p) = %d", nvroot, error); 2603185029Spjd nvlist_free(nvroot); 2604185029Spjd } else { 2605185029Spjd /* 2606185029Spjd * Remove an existing device. Sometimes, dirty its 2607185029Spjd * vdev state first to make sure we handle removal 2608185029Spjd * of devices that have pending state changes. 2609185029Spjd */ 2610185029Spjd if (ztest_random(2) == 0) 2611219089Spjd (void) vdev_online(spa, guid, 0, NULL); 2612185029Spjd 2613185029Spjd error = spa_vdev_remove(spa, guid, B_FALSE); 2614185029Spjd if (error != 0 && error != EBUSY) 2615185029Spjd fatal(0, "spa_vdev_remove(%llu) = %d", guid, error); 2616185029Spjd } 2617185029Spjd 2618236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2619168404Spjd} 2620168404Spjd 2621168404Spjd/* 2622219089Spjd * split a pool if it has mirror tlvdevs 2623219089Spjd */ 2624219089Spjd/* ARGSUSED */ 2625219089Spjdvoid 2626219089Spjdztest_split_pool(ztest_ds_t *zd, uint64_t id) 2627219089Spjd{ 2628219089Spjd ztest_shared_t *zs = ztest_shared; 2629236143Smm spa_t *spa = ztest_spa; 2630219089Spjd vdev_t *rvd = spa->spa_root_vdev; 2631219089Spjd nvlist_t *tree, **child, *config, *split, **schild; 2632219089Spjd uint_t c, children, schildren = 0, lastlogid = 0; 2633219089Spjd int error = 0; 2634219089Spjd 2635236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2636219089Spjd 2637219089Spjd /* ensure we have a useable config; mirrors of raidz aren't supported */ 2638236143Smm if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) { 2639236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2640219089Spjd return; 2641219089Spjd } 2642219089Spjd 2643219089Spjd /* clean up the old pool, if any */ 2644219089Spjd (void) spa_destroy("splitp"); 2645219089Spjd 2646219089Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2647219089Spjd 2648219089Spjd /* generate a config from the existing config */ 2649219089Spjd mutex_enter(&spa->spa_props_lock); 2650219089Spjd VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE, 2651219089Spjd &tree) == 0); 2652219089Spjd mutex_exit(&spa->spa_props_lock); 2653219089Spjd 2654219089Spjd VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, 2655219089Spjd &children) == 0); 2656219089Spjd 2657219089Spjd schild = malloc(rvd->vdev_children * sizeof (nvlist_t *)); 2658219089Spjd for (c = 0; c < children; c++) { 2659219089Spjd vdev_t *tvd = rvd->vdev_child[c]; 2660219089Spjd nvlist_t **mchild; 2661219089Spjd uint_t mchildren; 2662219089Spjd 2663219089Spjd if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) { 2664219089Spjd VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME, 2665219089Spjd 0) == 0); 2666219089Spjd VERIFY(nvlist_add_string(schild[schildren], 2667219089Spjd ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0); 2668219089Spjd VERIFY(nvlist_add_uint64(schild[schildren], 2669219089Spjd ZPOOL_CONFIG_IS_HOLE, 1) == 0); 2670219089Spjd if (lastlogid == 0) 2671219089Spjd lastlogid = schildren; 2672219089Spjd ++schildren; 2673219089Spjd continue; 2674219089Spjd } 2675219089Spjd lastlogid = 0; 2676219089Spjd VERIFY(nvlist_lookup_nvlist_array(child[c], 2677219089Spjd ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); 2678219089Spjd VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0); 2679219089Spjd } 2680219089Spjd 2681219089Spjd /* OK, create a config that can be used to split */ 2682219089Spjd VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0); 2683219089Spjd VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE, 2684219089Spjd VDEV_TYPE_ROOT) == 0); 2685219089Spjd VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild, 2686219089Spjd lastlogid != 0 ? lastlogid : schildren) == 0); 2687219089Spjd 2688219089Spjd VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0); 2689219089Spjd VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0); 2690219089Spjd 2691219089Spjd for (c = 0; c < schildren; c++) 2692219089Spjd nvlist_free(schild[c]); 2693219089Spjd free(schild); 2694219089Spjd nvlist_free(split); 2695219089Spjd 2696219089Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2697219089Spjd 2698236143Smm (void) rw_wrlock(&ztest_name_lock); 2699219089Spjd error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE); 2700236143Smm (void) rw_unlock(&ztest_name_lock); 2701219089Spjd 2702219089Spjd nvlist_free(config); 2703219089Spjd 2704219089Spjd if (error == 0) { 2705219089Spjd (void) printf("successful split - results:\n"); 2706219089Spjd mutex_enter(&spa_namespace_lock); 2707219089Spjd show_pool_stats(spa); 2708219089Spjd show_pool_stats(spa_lookup("splitp")); 2709219089Spjd mutex_exit(&spa_namespace_lock); 2710219089Spjd ++zs->zs_splits; 2711219089Spjd --zs->zs_mirrors; 2712219089Spjd } 2713236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2714219089Spjd 2715219089Spjd} 2716219089Spjd 2717219089Spjd/* 2718168404Spjd * Verify that we can attach and detach devices. 2719168404Spjd */ 2720219089Spjd/* ARGSUSED */ 2721168404Spjdvoid 2722219089Spjdztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) 2723168404Spjd{ 2724219089Spjd ztest_shared_t *zs = ztest_shared; 2725236143Smm spa_t *spa = ztest_spa; 2726185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 2727168404Spjd vdev_t *rvd = spa->spa_root_vdev; 2728168404Spjd vdev_t *oldvd, *newvd, *pvd; 2729185029Spjd nvlist_t *root; 2730219089Spjd uint64_t leaves; 2731168404Spjd uint64_t leaf, top; 2732168404Spjd uint64_t ashift = ztest_get_ashift(); 2733209962Smm uint64_t oldguid, pguid; 2734168404Spjd size_t oldsize, newsize; 2735168404Spjd char oldpath[MAXPATHLEN], newpath[MAXPATHLEN]; 2736168404Spjd int replacing; 2737185029Spjd int oldvd_has_siblings = B_FALSE; 2738185029Spjd int newvd_is_spare = B_FALSE; 2739185029Spjd int oldvd_is_log; 2740168404Spjd int error, expected_error; 2741168404Spjd 2742236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 2743236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 2744168404Spjd 2745185029Spjd spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 2746168404Spjd 2747168404Spjd /* 2748168404Spjd * Decide whether to do an attach or a replace. 2749168404Spjd */ 2750168404Spjd replacing = ztest_random(2); 2751168404Spjd 2752168404Spjd /* 2753168404Spjd * Pick a random top-level vdev. 2754168404Spjd */ 2755219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 2756168404Spjd 2757168404Spjd /* 2758168404Spjd * Pick a random leaf within it. 2759168404Spjd */ 2760168404Spjd leaf = ztest_random(leaves); 2761168404Spjd 2762168404Spjd /* 2763185029Spjd * Locate this vdev. 2764168404Spjd */ 2765185029Spjd oldvd = rvd->vdev_child[top]; 2766219089Spjd if (zs->zs_mirrors >= 1) { 2767209962Smm ASSERT(oldvd->vdev_ops == &vdev_mirror_ops); 2768219089Spjd ASSERT(oldvd->vdev_children >= zs->zs_mirrors); 2769236143Smm oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz]; 2770209962Smm } 2771236143Smm if (ztest_opts.zo_raidz > 1) { 2772209962Smm ASSERT(oldvd->vdev_ops == &vdev_raidz_ops); 2773236143Smm ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz); 2774236143Smm oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz]; 2775209962Smm } 2776168404Spjd 2777168404Spjd /* 2778185029Spjd * If we're already doing an attach or replace, oldvd may be a 2779185029Spjd * mirror vdev -- in which case, pick a random child. 2780168404Spjd */ 2781185029Spjd while (oldvd->vdev_children != 0) { 2782185029Spjd oldvd_has_siblings = B_TRUE; 2783209962Smm ASSERT(oldvd->vdev_children >= 2); 2784209962Smm oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)]; 2785185029Spjd } 2786168404Spjd 2787185029Spjd oldguid = oldvd->vdev_guid; 2788219089Spjd oldsize = vdev_get_min_asize(oldvd); 2789185029Spjd oldvd_is_log = oldvd->vdev_top->vdev_islog; 2790185029Spjd (void) strcpy(oldpath, oldvd->vdev_path); 2791185029Spjd pvd = oldvd->vdev_parent; 2792209962Smm pguid = pvd->vdev_guid; 2793185029Spjd 2794168404Spjd /* 2795185029Spjd * If oldvd has siblings, then half of the time, detach it. 2796168404Spjd */ 2797185029Spjd if (oldvd_has_siblings && ztest_random(2) == 0) { 2798185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2799209962Smm error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE); 2800209962Smm if (error != 0 && error != ENODEV && error != EBUSY && 2801209962Smm error != ENOTSUP) 2802209962Smm fatal(0, "detach (%s) returned %d", oldpath, error); 2803236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2804185029Spjd return; 2805185029Spjd } 2806168404Spjd 2807168404Spjd /* 2808185029Spjd * For the new vdev, choose with equal probability between the two 2809185029Spjd * standard paths (ending in either 'a' or 'b') or a random hot spare. 2810168404Spjd */ 2811185029Spjd if (sav->sav_count != 0 && ztest_random(3) == 0) { 2812185029Spjd newvd = sav->sav_vdevs[ztest_random(sav->sav_count)]; 2813185029Spjd newvd_is_spare = B_TRUE; 2814185029Spjd (void) strcpy(newpath, newvd->vdev_path); 2815185029Spjd } else { 2816185029Spjd (void) snprintf(newpath, sizeof (newpath), ztest_dev_template, 2817236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 2818236143Smm top * leaves + leaf); 2819185029Spjd if (ztest_random(2) == 0) 2820185029Spjd newpath[strlen(newpath) - 1] = 'b'; 2821185029Spjd newvd = vdev_lookup_by_path(rvd, newpath); 2822185029Spjd } 2823168404Spjd 2824185029Spjd if (newvd) { 2825219089Spjd newsize = vdev_get_min_asize(newvd); 2826185029Spjd } else { 2827185029Spjd /* 2828185029Spjd * Make newsize a little bigger or smaller than oldsize. 2829185029Spjd * If it's smaller, the attach should fail. 2830185029Spjd * If it's larger, and we're doing a replace, 2831185029Spjd * we should get dynamic LUN growth when we're done. 2832185029Spjd */ 2833185029Spjd newsize = 10 * oldsize / (9 + ztest_random(3)); 2834185029Spjd } 2835185029Spjd 2836168404Spjd /* 2837168404Spjd * If pvd is not a mirror or root, the attach should fail with ENOTSUP, 2838168404Spjd * unless it's a replace; in that case any non-replacing parent is OK. 2839168404Spjd * 2840168404Spjd * If newvd is already part of the pool, it should fail with EBUSY. 2841168404Spjd * 2842168404Spjd * If newvd is too small, it should fail with EOVERFLOW. 2843168404Spjd */ 2844185029Spjd if (pvd->vdev_ops != &vdev_mirror_ops && 2845185029Spjd pvd->vdev_ops != &vdev_root_ops && (!replacing || 2846185029Spjd pvd->vdev_ops == &vdev_replacing_ops || 2847185029Spjd pvd->vdev_ops == &vdev_spare_ops)) 2848185029Spjd expected_error = ENOTSUP; 2849185029Spjd else if (newvd_is_spare && (!replacing || oldvd_is_log)) 2850185029Spjd expected_error = ENOTSUP; 2851185029Spjd else if (newvd == oldvd) 2852185029Spjd expected_error = replacing ? 0 : EBUSY; 2853185029Spjd else if (vdev_lookup_by_path(rvd, newpath) != NULL) 2854168404Spjd expected_error = EBUSY; 2855168404Spjd else if (newsize < oldsize) 2856168404Spjd expected_error = EOVERFLOW; 2857168404Spjd else if (ashift > oldvd->vdev_top->vdev_ashift) 2858168404Spjd expected_error = EDOM; 2859168404Spjd else 2860168404Spjd expected_error = 0; 2861168404Spjd 2862185029Spjd spa_config_exit(spa, SCL_VDEV, FTAG); 2863168404Spjd 2864168404Spjd /* 2865168404Spjd * Build the nvlist describing newpath. 2866168404Spjd */ 2867243505Smm root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0, 2868185029Spjd ashift, 0, 0, 0, 1); 2869168404Spjd 2870185029Spjd error = spa_vdev_attach(spa, oldguid, root, replacing); 2871168404Spjd 2872168404Spjd nvlist_free(root); 2873168404Spjd 2874168404Spjd /* 2875168404Spjd * If our parent was the replacing vdev, but the replace completed, 2876168404Spjd * then instead of failing with ENOTSUP we may either succeed, 2877168404Spjd * fail with ENODEV, or fail with EOVERFLOW. 2878168404Spjd */ 2879168404Spjd if (expected_error == ENOTSUP && 2880168404Spjd (error == 0 || error == ENODEV || error == EOVERFLOW)) 2881168404Spjd expected_error = error; 2882168404Spjd 2883168404Spjd /* 2884168404Spjd * If someone grew the LUN, the replacement may be too small. 2885168404Spjd */ 2886185029Spjd if (error == EOVERFLOW || error == EBUSY) 2887168404Spjd expected_error = error; 2888168404Spjd 2889185029Spjd /* XXX workaround 6690467 */ 2890185029Spjd if (error != expected_error && expected_error != EBUSY) { 2891185029Spjd fatal(0, "attach (%s %llu, %s %llu, %d) " 2892185029Spjd "returned %d, expected %d", 2893185029Spjd oldpath, (longlong_t)oldsize, newpath, 2894185029Spjd (longlong_t)newsize, replacing, error, expected_error); 2895168404Spjd } 2896168404Spjd 2897236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 2898168404Spjd} 2899168404Spjd 2900168404Spjd/* 2901219089Spjd * Callback function which expands the physical size of the vdev. 2902168404Spjd */ 2903219089Spjdvdev_t * 2904219089Spjdgrow_vdev(vdev_t *vd, void *arg) 2905168404Spjd{ 2906219089Spjd spa_t *spa = vd->vdev_spa; 2907219089Spjd size_t *newsize = arg; 2908168404Spjd size_t fsize; 2909168404Spjd int fd; 2910168404Spjd 2911219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2912219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2913168404Spjd 2914219089Spjd if ((fd = open(vd->vdev_path, O_RDWR)) == -1) 2915219089Spjd return (vd); 2916219089Spjd 2917219089Spjd fsize = lseek(fd, 0, SEEK_END); 2918219089Spjd (void) ftruncate(fd, *newsize); 2919219089Spjd 2920236143Smm if (ztest_opts.zo_verbose >= 6) { 2921219089Spjd (void) printf("%s grew from %lu to %lu bytes\n", 2922219089Spjd vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize); 2923219089Spjd } 2924219089Spjd (void) close(fd); 2925219089Spjd return (NULL); 2926219089Spjd} 2927219089Spjd 2928219089Spjd/* 2929219089Spjd * Callback function which expands a given vdev by calling vdev_online(). 2930219089Spjd */ 2931219089Spjd/* ARGSUSED */ 2932219089Spjdvdev_t * 2933219089Spjdonline_vdev(vdev_t *vd, void *arg) 2934219089Spjd{ 2935219089Spjd spa_t *spa = vd->vdev_spa; 2936219089Spjd vdev_t *tvd = vd->vdev_top; 2937219089Spjd uint64_t guid = vd->vdev_guid; 2938219089Spjd uint64_t generation = spa->spa_config_generation + 1; 2939219089Spjd vdev_state_t newstate = VDEV_STATE_UNKNOWN; 2940219089Spjd int error; 2941219089Spjd 2942219089Spjd ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); 2943219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 2944219089Spjd 2945219089Spjd /* Calling vdev_online will initialize the new metaslabs */ 2946219089Spjd spa_config_exit(spa, SCL_STATE, spa); 2947219089Spjd error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate); 2948219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 2949219089Spjd 2950168404Spjd /* 2951219089Spjd * If vdev_online returned an error or the underlying vdev_open 2952219089Spjd * failed then we abort the expand. The only way to know that 2953219089Spjd * vdev_open fails is by checking the returned newstate. 2954168404Spjd */ 2955219089Spjd if (error || newstate != VDEV_STATE_HEALTHY) { 2956236143Smm if (ztest_opts.zo_verbose >= 5) { 2957219089Spjd (void) printf("Unable to expand vdev, state %llu, " 2958219089Spjd "error %d\n", (u_longlong_t)newstate, error); 2959219089Spjd } 2960219089Spjd return (vd); 2961219089Spjd } 2962219089Spjd ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY); 2963168404Spjd 2964219089Spjd /* 2965219089Spjd * Since we dropped the lock we need to ensure that we're 2966219089Spjd * still talking to the original vdev. It's possible this 2967219089Spjd * vdev may have been detached/replaced while we were 2968219089Spjd * trying to online it. 2969219089Spjd */ 2970219089Spjd if (generation != spa->spa_config_generation) { 2971236143Smm if (ztest_opts.zo_verbose >= 5) { 2972219089Spjd (void) printf("vdev configuration has changed, " 2973219089Spjd "guid %llu, state %llu, expected gen %llu, " 2974219089Spjd "got gen %llu\n", 2975219089Spjd (u_longlong_t)guid, 2976219089Spjd (u_longlong_t)tvd->vdev_state, 2977219089Spjd (u_longlong_t)generation, 2978219089Spjd (u_longlong_t)spa->spa_config_generation); 2979219089Spjd } 2980219089Spjd return (vd); 2981219089Spjd } 2982219089Spjd return (NULL); 2983219089Spjd} 2984168404Spjd 2985219089Spjd/* 2986219089Spjd * Traverse the vdev tree calling the supplied function. 2987219089Spjd * We continue to walk the tree until we either have walked all 2988219089Spjd * children or we receive a non-NULL return from the callback. 2989219089Spjd * If a NULL callback is passed, then we just return back the first 2990219089Spjd * leaf vdev we encounter. 2991219089Spjd */ 2992219089Spjdvdev_t * 2993219089Spjdvdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg) 2994219089Spjd{ 2995219089Spjd if (vd->vdev_ops->vdev_op_leaf) { 2996219089Spjd if (func == NULL) 2997219089Spjd return (vd); 2998219089Spjd else 2999219089Spjd return (func(vd, arg)); 3000219089Spjd } 3001168404Spjd 3002219089Spjd for (uint_t c = 0; c < vd->vdev_children; c++) { 3003219089Spjd vdev_t *cvd = vd->vdev_child[c]; 3004219089Spjd if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL) 3005219089Spjd return (cvd); 3006219089Spjd } 3007219089Spjd return (NULL); 3008219089Spjd} 3009219089Spjd 3010219089Spjd/* 3011219089Spjd * Verify that dynamic LUN growth works as expected. 3012219089Spjd */ 3013219089Spjd/* ARGSUSED */ 3014219089Spjdvoid 3015219089Spjdztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) 3016219089Spjd{ 3017236143Smm spa_t *spa = ztest_spa; 3018219089Spjd vdev_t *vd, *tvd; 3019219089Spjd metaslab_class_t *mc; 3020219089Spjd metaslab_group_t *mg; 3021219089Spjd size_t psize, newsize; 3022219089Spjd uint64_t top; 3023219089Spjd uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count; 3024219089Spjd 3025236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 3026219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3027219089Spjd 3028219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 3029219089Spjd 3030219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3031219089Spjd mg = tvd->vdev_mg; 3032219089Spjd mc = mg->mg_class; 3033219089Spjd old_ms_count = tvd->vdev_ms_count; 3034219089Spjd old_class_space = metaslab_class_get_space(mc); 3035219089Spjd 3036219089Spjd /* 3037219089Spjd * Determine the size of the first leaf vdev associated with 3038219089Spjd * our top-level device. 3039219089Spjd */ 3040219089Spjd vd = vdev_walk_tree(tvd, NULL, NULL); 3041219089Spjd ASSERT3P(vd, !=, NULL); 3042219089Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 3043219089Spjd 3044219089Spjd psize = vd->vdev_psize; 3045219089Spjd 3046219089Spjd /* 3047219089Spjd * We only try to expand the vdev if it's healthy, less than 4x its 3048219089Spjd * original size, and it has a valid psize. 3049219089Spjd */ 3050219089Spjd if (tvd->vdev_state != VDEV_STATE_HEALTHY || 3051236143Smm psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) { 3052219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3053236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3054219089Spjd return; 3055219089Spjd } 3056219089Spjd ASSERT(psize > 0); 3057219089Spjd newsize = psize + psize / 8; 3058219089Spjd ASSERT3U(newsize, >, psize); 3059219089Spjd 3060236143Smm if (ztest_opts.zo_verbose >= 6) { 3061219089Spjd (void) printf("Expanding LUN %s from %lu to %lu\n", 3062219089Spjd vd->vdev_path, (ulong_t)psize, (ulong_t)newsize); 3063219089Spjd } 3064219089Spjd 3065219089Spjd /* 3066219089Spjd * Growing the vdev is a two step process: 3067219089Spjd * 1). expand the physical size (i.e. relabel) 3068219089Spjd * 2). online the vdev to create the new metaslabs 3069219089Spjd */ 3070219089Spjd if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL || 3071219089Spjd vdev_walk_tree(tvd, online_vdev, NULL) != NULL || 3072219089Spjd tvd->vdev_state != VDEV_STATE_HEALTHY) { 3073236143Smm if (ztest_opts.zo_verbose >= 5) { 3074219089Spjd (void) printf("Could not expand LUN because " 3075219089Spjd "the vdev configuration changed.\n"); 3076168404Spjd } 3077219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3078236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3079219089Spjd return; 3080168404Spjd } 3081168404Spjd 3082219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3083219089Spjd 3084219089Spjd /* 3085219089Spjd * Expanding the LUN will update the config asynchronously, 3086219089Spjd * thus we must wait for the async thread to complete any 3087219089Spjd * pending tasks before proceeding. 3088219089Spjd */ 3089219089Spjd for (;;) { 3090219089Spjd boolean_t done; 3091219089Spjd mutex_enter(&spa->spa_async_lock); 3092219089Spjd done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks); 3093219089Spjd mutex_exit(&spa->spa_async_lock); 3094219089Spjd if (done) 3095219089Spjd break; 3096219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 3097219089Spjd (void) poll(NULL, 0, 100); 3098219089Spjd } 3099219089Spjd 3100219089Spjd spa_config_enter(spa, SCL_STATE, spa, RW_READER); 3101219089Spjd 3102219089Spjd tvd = spa->spa_root_vdev->vdev_child[top]; 3103219089Spjd new_ms_count = tvd->vdev_ms_count; 3104219089Spjd new_class_space = metaslab_class_get_space(mc); 3105219089Spjd 3106219089Spjd if (tvd->vdev_mg != mg || mg->mg_class != mc) { 3107236143Smm if (ztest_opts.zo_verbose >= 5) { 3108219089Spjd (void) printf("Could not verify LUN expansion due to " 3109219089Spjd "intervening vdev offline or remove.\n"); 3110219089Spjd } 3111219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3112236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3113219089Spjd return; 3114219089Spjd } 3115219089Spjd 3116219089Spjd /* 3117219089Spjd * Make sure we were able to grow the vdev. 3118219089Spjd */ 3119219089Spjd if (new_ms_count <= old_ms_count) 3120219089Spjd fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n", 3121219089Spjd old_ms_count, new_ms_count); 3122219089Spjd 3123219089Spjd /* 3124219089Spjd * Make sure we were able to grow the pool. 3125219089Spjd */ 3126219089Spjd if (new_class_space <= old_class_space) 3127219089Spjd fatal(0, "LUN expansion failed: class_space %llu <= %llu\n", 3128219089Spjd old_class_space, new_class_space); 3129219089Spjd 3130236143Smm if (ztest_opts.zo_verbose >= 5) { 3131219089Spjd char oldnumbuf[6], newnumbuf[6]; 3132219089Spjd 3133219089Spjd nicenum(old_class_space, oldnumbuf); 3134219089Spjd nicenum(new_class_space, newnumbuf); 3135219089Spjd (void) printf("%s grew from %s to %s\n", 3136219089Spjd spa->spa_name, oldnumbuf, newnumbuf); 3137219089Spjd } 3138219089Spjd 3139219089Spjd spa_config_exit(spa, SCL_STATE, spa); 3140236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 3141168404Spjd} 3142168404Spjd 3143219089Spjd/* 3144219089Spjd * Verify that dmu_objset_{create,destroy,open,close} work as expected. 3145219089Spjd */ 3146168404Spjd/* ARGSUSED */ 3147168404Spjdstatic void 3148219089Spjdztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 3149168404Spjd{ 3150168404Spjd /* 3151219089Spjd * Create the objects common to all ztest datasets. 3152168404Spjd */ 3153219089Spjd VERIFY(zap_create_claim(os, ZTEST_DIROBJ, 3154168404Spjd DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0); 3155219089Spjd} 3156168404Spjd 3157219089Spjdstatic int 3158219089Spjdztest_dataset_create(char *dsname) 3159219089Spjd{ 3160219089Spjd uint64_t zilset = ztest_random(100); 3161219089Spjd int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, 3162219089Spjd ztest_objset_create_cb, NULL); 3163219089Spjd 3164219089Spjd if (err || zilset < 80) 3165219089Spjd return (err); 3166219089Spjd 3167236143Smm if (ztest_opts.zo_verbose >= 6) 3168236143Smm (void) printf("Setting dataset %s to sync always\n", dsname); 3169219089Spjd return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC, 3170219089Spjd ZFS_SYNC_ALWAYS, B_FALSE)); 3171168404Spjd} 3172168404Spjd 3173219089Spjd/* ARGSUSED */ 3174168404Spjdstatic int 3175219089Spjdztest_objset_destroy_cb(const char *name, void *arg) 3176168404Spjd{ 3177168404Spjd objset_t *os; 3178219089Spjd dmu_object_info_t doi; 3179168404Spjd int error; 3180168404Spjd 3181168404Spjd /* 3182168404Spjd * Verify that the dataset contains a directory object. 3183168404Spjd */ 3184248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); 3185219089Spjd error = dmu_object_info(os, ZTEST_DIROBJ, &doi); 3186168404Spjd if (error != ENOENT) { 3187168404Spjd /* We could have crashed in the middle of destroying it */ 3188240415Smm ASSERT0(error); 3189219089Spjd ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); 3190219089Spjd ASSERT3S(doi.doi_physical_blocks_512, >=, 0); 3191168404Spjd } 3192248571Smm dmu_objset_disown(os, FTAG); 3193168404Spjd 3194168404Spjd /* 3195168404Spjd * Destroy the dataset. 3196168404Spjd */ 3197248571Smm if (strchr(name, '@') != NULL) { 3198248571Smm VERIFY0(dsl_destroy_snapshot(name, B_FALSE)); 3199248571Smm } else { 3200248571Smm VERIFY0(dsl_destroy_head(name)); 3201248571Smm } 3202168404Spjd return (0); 3203168404Spjd} 3204168404Spjd 3205219089Spjdstatic boolean_t 3206219089Spjdztest_snapshot_create(char *osname, uint64_t id) 3207168404Spjd{ 3208219089Spjd char snapname[MAXNAMELEN]; 3209219089Spjd int error; 3210168404Spjd 3211248571Smm (void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id); 3212168404Spjd 3213248571Smm error = dmu_objset_snapshot_one(osname, snapname); 3214219089Spjd if (error == ENOSPC) { 3215219089Spjd ztest_record_enospc(FTAG); 3216219089Spjd return (B_FALSE); 3217219089Spjd } 3218248571Smm if (error != 0 && error != EEXIST) { 3219248571Smm fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname, 3220248571Smm snapname, error); 3221248571Smm } 3222219089Spjd return (B_TRUE); 3223219089Spjd} 3224168404Spjd 3225219089Spjdstatic boolean_t 3226219089Spjdztest_snapshot_destroy(char *osname, uint64_t id) 3227219089Spjd{ 3228219089Spjd char snapname[MAXNAMELEN]; 3229219089Spjd int error; 3230219089Spjd 3231219089Spjd (void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname, 3232219089Spjd (u_longlong_t)id); 3233219089Spjd 3234248571Smm error = dsl_destroy_snapshot(snapname, B_FALSE); 3235219089Spjd if (error != 0 && error != ENOENT) 3236219089Spjd fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error); 3237219089Spjd return (B_TRUE); 3238168404Spjd} 3239168404Spjd 3240219089Spjd/* ARGSUSED */ 3241168404Spjdvoid 3242219089Spjdztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) 3243168404Spjd{ 3244219089Spjd ztest_ds_t zdtmp; 3245219089Spjd int iters; 3246168404Spjd int error; 3247185029Spjd objset_t *os, *os2; 3248219089Spjd char name[MAXNAMELEN]; 3249168404Spjd zilog_t *zilog; 3250168404Spjd 3251236143Smm (void) rw_rdlock(&ztest_name_lock); 3252168404Spjd 3253219089Spjd (void) snprintf(name, MAXNAMELEN, "%s/temp_%llu", 3254236143Smm ztest_opts.zo_pool, (u_longlong_t)id); 3255168404Spjd 3256168404Spjd /* 3257168404Spjd * If this dataset exists from a previous run, process its replay log 3258168404Spjd * half of the time. If we don't replay it, then dmu_objset_destroy() 3259219089Spjd * (invoked from ztest_objset_destroy_cb()) should just throw it away. 3260168404Spjd */ 3261168404Spjd if (ztest_random(2) == 0 && 3262219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { 3263236143Smm ztest_zd_init(&zdtmp, NULL, os); 3264219089Spjd zil_replay(os, &zdtmp, ztest_replay_vector); 3265219089Spjd ztest_zd_fini(&zdtmp); 3266219089Spjd dmu_objset_disown(os, FTAG); 3267168404Spjd } 3268168404Spjd 3269168404Spjd /* 3270168404Spjd * There may be an old instance of the dataset we're about to 3271168404Spjd * create lying around from a previous run. If so, destroy it 3272168404Spjd * and all of its snapshots. 3273168404Spjd */ 3274219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 3275168404Spjd DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 3276168404Spjd 3277168404Spjd /* 3278168404Spjd * Verify that the destroyed dataset is no longer in the namespace. 3279168404Spjd */ 3280248571Smm VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, 3281248571Smm FTAG, &os)); 3282168404Spjd 3283168404Spjd /* 3284168404Spjd * Verify that we can create a new dataset. 3285168404Spjd */ 3286219089Spjd error = ztest_dataset_create(name); 3287168404Spjd if (error) { 3288168404Spjd if (error == ENOSPC) { 3289219089Spjd ztest_record_enospc(FTAG); 3290236143Smm (void) rw_unlock(&ztest_name_lock); 3291168404Spjd return; 3292168404Spjd } 3293168404Spjd fatal(0, "dmu_objset_create(%s) = %d", name, error); 3294168404Spjd } 3295168404Spjd 3296248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); 3297168404Spjd 3298236143Smm ztest_zd_init(&zdtmp, NULL, os); 3299219089Spjd 3300168404Spjd /* 3301168404Spjd * Open the intent log for it. 3302168404Spjd */ 3303219089Spjd zilog = zil_open(os, ztest_get_data); 3304168404Spjd 3305168404Spjd /* 3306219089Spjd * Put some objects in there, do a little I/O to them, 3307219089Spjd * and randomly take a couple of snapshots along the way. 3308168404Spjd */ 3309219089Spjd iters = ztest_random(5); 3310219089Spjd for (int i = 0; i < iters; i++) { 3311219089Spjd ztest_dmu_object_alloc_free(&zdtmp, id); 3312219089Spjd if (ztest_random(iters) == 0) 3313219089Spjd (void) ztest_snapshot_create(name, i); 3314168404Spjd } 3315168404Spjd 3316168404Spjd /* 3317168404Spjd * Verify that we cannot create an existing dataset. 3318168404Spjd */ 3319219089Spjd VERIFY3U(EEXIST, ==, 3320219089Spjd dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); 3321168404Spjd 3322168404Spjd /* 3323219089Spjd * Verify that we can hold an objset that is also owned. 3324168404Spjd */ 3325219089Spjd VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2)); 3326219089Spjd dmu_objset_rele(os2, FTAG); 3327168404Spjd 3328219089Spjd /* 3329219089Spjd * Verify that we cannot own an objset that is already owned. 3330219089Spjd */ 3331219089Spjd VERIFY3U(EBUSY, ==, 3332219089Spjd dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); 3333219089Spjd 3334168404Spjd zil_close(zilog); 3335219089Spjd dmu_objset_disown(os, FTAG); 3336219089Spjd ztest_zd_fini(&zdtmp); 3337168404Spjd 3338236143Smm (void) rw_unlock(&ztest_name_lock); 3339168404Spjd} 3340168404Spjd 3341168404Spjd/* 3342168404Spjd * Verify that dmu_snapshot_{create,destroy,open,close} work as expected. 3343168404Spjd */ 3344168404Spjdvoid 3345219089Spjdztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id) 3346168404Spjd{ 3347236143Smm (void) rw_rdlock(&ztest_name_lock); 3348219089Spjd (void) ztest_snapshot_destroy(zd->zd_name, id); 3349219089Spjd (void) ztest_snapshot_create(zd->zd_name, id); 3350236143Smm (void) rw_unlock(&ztest_name_lock); 3351219089Spjd} 3352219089Spjd 3353219089Spjd/* 3354219089Spjd * Cleanup non-standard snapshots and clones. 3355219089Spjd */ 3356219089Spjdvoid 3357219089Spjdztest_dsl_dataset_cleanup(char *osname, uint64_t id) 3358219089Spjd{ 3359219089Spjd char snap1name[MAXNAMELEN]; 3360219089Spjd char clone1name[MAXNAMELEN]; 3361219089Spjd char snap2name[MAXNAMELEN]; 3362219089Spjd char clone2name[MAXNAMELEN]; 3363219089Spjd char snap3name[MAXNAMELEN]; 3364168404Spjd int error; 3365168404Spjd 3366219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3367219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3368219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3369219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3370219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3371168404Spjd 3372248571Smm error = dsl_destroy_head(clone2name); 3373219089Spjd if (error && error != ENOENT) 3374248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error); 3375248571Smm error = dsl_destroy_snapshot(snap3name, B_FALSE); 3376219089Spjd if (error && error != ENOENT) 3377248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error); 3378248571Smm error = dsl_destroy_snapshot(snap2name, B_FALSE); 3379219089Spjd if (error && error != ENOENT) 3380248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error); 3381248571Smm error = dsl_destroy_head(clone1name); 3382219089Spjd if (error && error != ENOENT) 3383248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error); 3384248571Smm error = dsl_destroy_snapshot(snap1name, B_FALSE); 3385219089Spjd if (error && error != ENOENT) 3386248571Smm fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error); 3387168404Spjd} 3388168404Spjd 3389168404Spjd/* 3390207910Smm * Verify dsl_dataset_promote handles EBUSY 3391207910Smm */ 3392207910Smmvoid 3393219089Spjdztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) 3394207910Smm{ 3395248571Smm objset_t *os; 3396219089Spjd char snap1name[MAXNAMELEN]; 3397219089Spjd char clone1name[MAXNAMELEN]; 3398219089Spjd char snap2name[MAXNAMELEN]; 3399219089Spjd char clone2name[MAXNAMELEN]; 3400219089Spjd char snap3name[MAXNAMELEN]; 3401219089Spjd char *osname = zd->zd_name; 3402219089Spjd int error; 3403207910Smm 3404236143Smm (void) rw_rdlock(&ztest_name_lock); 3405207910Smm 3406219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3407207910Smm 3408219089Spjd (void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu", osname, id); 3409219089Spjd (void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu", osname, id); 3410219089Spjd (void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu", clone1name, id); 3411219089Spjd (void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id); 3412219089Spjd (void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id); 3413207910Smm 3414248571Smm error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1); 3415209962Smm if (error && error != EEXIST) { 3416209962Smm if (error == ENOSPC) { 3417209962Smm ztest_record_enospc(FTAG); 3418209962Smm goto out; 3419209962Smm } 3420209962Smm fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); 3421209962Smm } 3422207910Smm 3423248571Smm error = dmu_objset_clone(clone1name, snap1name); 3424209962Smm if (error) { 3425209962Smm if (error == ENOSPC) { 3426209962Smm ztest_record_enospc(FTAG); 3427209962Smm goto out; 3428209962Smm } 3429207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); 3430209962Smm } 3431207910Smm 3432248571Smm error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1); 3433209962Smm if (error && error != EEXIST) { 3434209962Smm if (error == ENOSPC) { 3435209962Smm ztest_record_enospc(FTAG); 3436209962Smm goto out; 3437209962Smm } 3438209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); 3439209962Smm } 3440207910Smm 3441248571Smm error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1); 3442209962Smm if (error && error != EEXIST) { 3443209962Smm if (error == ENOSPC) { 3444209962Smm ztest_record_enospc(FTAG); 3445209962Smm goto out; 3446209962Smm } 3447209962Smm fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); 3448209962Smm } 3449207910Smm 3450248571Smm error = dmu_objset_clone(clone2name, snap3name); 3451209962Smm if (error) { 3452209962Smm if (error == ENOSPC) { 3453219089Spjd ztest_record_enospc(FTAG); 3454209962Smm goto out; 3455209962Smm } 3456207910Smm fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); 3457209962Smm } 3458207910Smm 3459248571Smm error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); 3460207910Smm if (error) 3461248571Smm fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); 3462219089Spjd error = dsl_dataset_promote(clone2name, NULL); 3463207910Smm if (error != EBUSY) 3464207910Smm fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, 3465207910Smm error); 3466248571Smm dmu_objset_disown(os, FTAG); 3467207910Smm 3468209962Smmout: 3469219089Spjd ztest_dsl_dataset_cleanup(osname, id); 3470207910Smm 3471236143Smm (void) rw_unlock(&ztest_name_lock); 3472207910Smm} 3473207910Smm 3474207910Smm/* 3475168404Spjd * Verify that dmu_object_{alloc,free} work as expected. 3476168404Spjd */ 3477168404Spjdvoid 3478219089Spjdztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id) 3479168404Spjd{ 3480219089Spjd ztest_od_t od[4]; 3481219089Spjd int batchsize = sizeof (od) / sizeof (od[0]); 3482168404Spjd 3483219089Spjd for (int b = 0; b < batchsize; b++) 3484219089Spjd ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0); 3485168404Spjd 3486168404Spjd /* 3487219089Spjd * Destroy the previous batch of objects, create a new batch, 3488219089Spjd * and do some I/O on the new objects. 3489168404Spjd */ 3490219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0) 3491219089Spjd return; 3492168404Spjd 3493219089Spjd while (ztest_random(4 * batchsize) != 0) 3494219089Spjd ztest_io(zd, od[ztest_random(batchsize)].od_object, 3495219089Spjd ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 3496168404Spjd} 3497168404Spjd 3498168404Spjd/* 3499168404Spjd * Verify that dmu_{read,write} work as expected. 3500168404Spjd */ 3501168404Spjdvoid 3502219089Spjdztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) 3503168404Spjd{ 3504219089Spjd objset_t *os = zd->zd_os; 3505219089Spjd ztest_od_t od[2]; 3506168404Spjd dmu_tx_t *tx; 3507168404Spjd int i, freeit, error; 3508168404Spjd uint64_t n, s, txg; 3509168404Spjd bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT; 3510219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3511219089Spjd uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t); 3512168404Spjd uint64_t regions = 997; 3513168404Spjd uint64_t stride = 123456789ULL; 3514168404Spjd uint64_t width = 40; 3515168404Spjd int free_percent = 5; 3516168404Spjd 3517168404Spjd /* 3518168404Spjd * This test uses two objects, packobj and bigobj, that are always 3519168404Spjd * updated together (i.e. in the same tx) so that their contents are 3520168404Spjd * in sync and can be compared. Their contents relate to each other 3521168404Spjd * in a simple way: packobj is a dense array of 'bufwad' structures, 3522168404Spjd * while bigobj is a sparse array of the same bufwads. Specifically, 3523168404Spjd * for any index n, there are three bufwads that should be identical: 3524168404Spjd * 3525168404Spjd * packobj, at offset n * sizeof (bufwad_t) 3526168404Spjd * bigobj, at the head of the nth chunk 3527168404Spjd * bigobj, at the tail of the nth chunk 3528168404Spjd * 3529168404Spjd * The chunk size is arbitrary. It doesn't have to be a power of two, 3530168404Spjd * and it doesn't have any relation to the object blocksize. 3531168404Spjd * The only requirement is that it can hold at least two bufwads. 3532168404Spjd * 3533168404Spjd * Normally, we write the bufwad to each of these locations. 3534168404Spjd * However, free_percent of the time we instead write zeroes to 3535168404Spjd * packobj and perform a dmu_free_range() on bigobj. By comparing 3536168404Spjd * bigobj to packobj, we can verify that the DMU is correctly 3537168404Spjd * tracking which parts of an object are allocated and free, 3538168404Spjd * and that the contents of the allocated blocks are correct. 3539168404Spjd */ 3540168404Spjd 3541168404Spjd /* 3542168404Spjd * Read the directory info. If it's the first time, set things up. 3543168404Spjd */ 3544219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize); 3545219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3546168404Spjd 3547219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3548219089Spjd return; 3549168404Spjd 3550219089Spjd bigobj = od[0].od_object; 3551219089Spjd packobj = od[1].od_object; 3552219089Spjd chunksize = od[0].od_gen; 3553219089Spjd ASSERT(chunksize == od[1].od_gen); 3554168404Spjd 3555168404Spjd /* 3556168404Spjd * Prefetch a random chunk of the big object. 3557168404Spjd * Our aim here is to get some async reads in flight 3558168404Spjd * for blocks that we may free below; the DMU should 3559168404Spjd * handle this race correctly. 3560168404Spjd */ 3561168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3562168404Spjd s = 1 + ztest_random(2 * width - 1); 3563219089Spjd dmu_prefetch(os, bigobj, n * chunksize, s * chunksize); 3564168404Spjd 3565168404Spjd /* 3566168404Spjd * Pick a random index and compute the offsets into packobj and bigobj. 3567168404Spjd */ 3568168404Spjd n = ztest_random(regions) * stride + ztest_random(width); 3569168404Spjd s = 1 + ztest_random(width - 1); 3570168404Spjd 3571168404Spjd packoff = n * sizeof (bufwad_t); 3572168404Spjd packsize = s * sizeof (bufwad_t); 3573168404Spjd 3574219089Spjd bigoff = n * chunksize; 3575219089Spjd bigsize = s * chunksize; 3576168404Spjd 3577168404Spjd packbuf = umem_alloc(packsize, UMEM_NOFAIL); 3578168404Spjd bigbuf = umem_alloc(bigsize, UMEM_NOFAIL); 3579168404Spjd 3580168404Spjd /* 3581168404Spjd * free_percent of the time, free a range of bigobj rather than 3582168404Spjd * overwriting it. 3583168404Spjd */ 3584168404Spjd freeit = (ztest_random(100) < free_percent); 3585168404Spjd 3586168404Spjd /* 3587168404Spjd * Read the current contents of our objects. 3588168404Spjd */ 3589219089Spjd error = dmu_read(os, packobj, packoff, packsize, packbuf, 3590209962Smm DMU_READ_PREFETCH); 3591240415Smm ASSERT0(error); 3592219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf, 3593209962Smm DMU_READ_PREFETCH); 3594240415Smm ASSERT0(error); 3595168404Spjd 3596168404Spjd /* 3597168404Spjd * Get a tx for the mods to both packobj and bigobj. 3598168404Spjd */ 3599168404Spjd tx = dmu_tx_create(os); 3600168404Spjd 3601219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3602168404Spjd 3603168404Spjd if (freeit) 3604219089Spjd dmu_tx_hold_free(tx, bigobj, bigoff, bigsize); 3605168404Spjd else 3606219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3607168404Spjd 3608219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3609219089Spjd if (txg == 0) { 3610168404Spjd umem_free(packbuf, packsize); 3611168404Spjd umem_free(bigbuf, bigsize); 3612168404Spjd return; 3613168404Spjd } 3614168404Spjd 3615219089Spjd dmu_object_set_checksum(os, bigobj, 3616219089Spjd (enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx); 3617168404Spjd 3618219089Spjd dmu_object_set_compress(os, bigobj, 3619219089Spjd (enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx); 3620219089Spjd 3621168404Spjd /* 3622168404Spjd * For each index from n to n + s, verify that the existing bufwad 3623168404Spjd * in packobj matches the bufwads at the head and tail of the 3624168404Spjd * corresponding chunk in bigobj. Then update all three bufwads 3625168404Spjd * with the new values we want to write out. 3626168404Spjd */ 3627168404Spjd for (i = 0; i < s; i++) { 3628168404Spjd /* LINTED */ 3629168404Spjd pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3630168404Spjd /* LINTED */ 3631219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3632168404Spjd /* LINTED */ 3633219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3634168404Spjd 3635168404Spjd ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3636168404Spjd ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3637168404Spjd 3638168404Spjd if (pack->bw_txg > txg) 3639168404Spjd fatal(0, "future leak: got %llx, open txg is %llx", 3640168404Spjd pack->bw_txg, txg); 3641168404Spjd 3642168404Spjd if (pack->bw_data != 0 && pack->bw_index != n + i) 3643168404Spjd fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3644168404Spjd pack->bw_index, n, i); 3645168404Spjd 3646168404Spjd if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3647168404Spjd fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3648168404Spjd 3649168404Spjd if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3650168404Spjd fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3651168404Spjd 3652168404Spjd if (freeit) { 3653168404Spjd bzero(pack, sizeof (bufwad_t)); 3654168404Spjd } else { 3655168404Spjd pack->bw_index = n + i; 3656168404Spjd pack->bw_txg = txg; 3657168404Spjd pack->bw_data = 1 + ztest_random(-2ULL); 3658168404Spjd } 3659168404Spjd *bigH = *pack; 3660168404Spjd *bigT = *pack; 3661168404Spjd } 3662168404Spjd 3663168404Spjd /* 3664168404Spjd * We've verified all the old bufwads, and made new ones. 3665168404Spjd * Now write them out. 3666168404Spjd */ 3667219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3668168404Spjd 3669168404Spjd if (freeit) { 3670236143Smm if (ztest_opts.zo_verbose >= 7) { 3671168404Spjd (void) printf("freeing offset %llx size %llx" 3672168404Spjd " txg %llx\n", 3673168404Spjd (u_longlong_t)bigoff, 3674168404Spjd (u_longlong_t)bigsize, 3675168404Spjd (u_longlong_t)txg); 3676168404Spjd } 3677219089Spjd VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx)); 3678168404Spjd } else { 3679236143Smm if (ztest_opts.zo_verbose >= 7) { 3680168404Spjd (void) printf("writing offset %llx size %llx" 3681168404Spjd " txg %llx\n", 3682168404Spjd (u_longlong_t)bigoff, 3683168404Spjd (u_longlong_t)bigsize, 3684168404Spjd (u_longlong_t)txg); 3685168404Spjd } 3686219089Spjd dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx); 3687168404Spjd } 3688168404Spjd 3689168404Spjd dmu_tx_commit(tx); 3690168404Spjd 3691168404Spjd /* 3692168404Spjd * Sanity check the stuff we just wrote. 3693168404Spjd */ 3694168404Spjd { 3695168404Spjd void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3696168404Spjd void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3697168404Spjd 3698219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3699209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3700219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3701209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3702168404Spjd 3703168404Spjd ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3704168404Spjd ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3705168404Spjd 3706168404Spjd umem_free(packcheck, packsize); 3707168404Spjd umem_free(bigcheck, bigsize); 3708168404Spjd } 3709168404Spjd 3710168404Spjd umem_free(packbuf, packsize); 3711168404Spjd umem_free(bigbuf, bigsize); 3712168404Spjd} 3713168404Spjd 3714168404Spjdvoid 3715209962Smmcompare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf, 3716219089Spjd uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg) 3717209962Smm{ 3718209962Smm uint64_t i; 3719209962Smm bufwad_t *pack; 3720209962Smm bufwad_t *bigH; 3721209962Smm bufwad_t *bigT; 3722209962Smm 3723209962Smm /* 3724209962Smm * For each index from n to n + s, verify that the existing bufwad 3725209962Smm * in packobj matches the bufwads at the head and tail of the 3726209962Smm * corresponding chunk in bigobj. Then update all three bufwads 3727209962Smm * with the new values we want to write out. 3728209962Smm */ 3729209962Smm for (i = 0; i < s; i++) { 3730209962Smm /* LINTED */ 3731209962Smm pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); 3732209962Smm /* LINTED */ 3733219089Spjd bigH = (bufwad_t *)((char *)bigbuf + i * chunksize); 3734209962Smm /* LINTED */ 3735219089Spjd bigT = (bufwad_t *)((char *)bigH + chunksize) - 1; 3736209962Smm 3737209962Smm ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); 3738209962Smm ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); 3739209962Smm 3740209962Smm if (pack->bw_txg > txg) 3741209962Smm fatal(0, "future leak: got %llx, open txg is %llx", 3742209962Smm pack->bw_txg, txg); 3743209962Smm 3744209962Smm if (pack->bw_data != 0 && pack->bw_index != n + i) 3745209962Smm fatal(0, "wrong index: got %llx, wanted %llx+%llx", 3746209962Smm pack->bw_index, n, i); 3747209962Smm 3748209962Smm if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) 3749209962Smm fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); 3750209962Smm 3751209962Smm if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) 3752209962Smm fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); 3753209962Smm 3754209962Smm pack->bw_index = n + i; 3755209962Smm pack->bw_txg = txg; 3756209962Smm pack->bw_data = 1 + ztest_random(-2ULL); 3757209962Smm 3758209962Smm *bigH = *pack; 3759209962Smm *bigT = *pack; 3760209962Smm } 3761209962Smm} 3762209962Smm 3763209962Smmvoid 3764219089Spjdztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) 3765209962Smm{ 3766219089Spjd objset_t *os = zd->zd_os; 3767219089Spjd ztest_od_t od[2]; 3768209962Smm dmu_tx_t *tx; 3769209962Smm uint64_t i; 3770209962Smm int error; 3771209962Smm uint64_t n, s, txg; 3772209962Smm bufwad_t *packbuf, *bigbuf; 3773219089Spjd uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize; 3774219089Spjd uint64_t blocksize = ztest_random_blocksize(); 3775219089Spjd uint64_t chunksize = blocksize; 3776209962Smm uint64_t regions = 997; 3777209962Smm uint64_t stride = 123456789ULL; 3778209962Smm uint64_t width = 9; 3779209962Smm dmu_buf_t *bonus_db; 3780209962Smm arc_buf_t **bigbuf_arcbufs; 3781219089Spjd dmu_object_info_t doi; 3782209962Smm 3783209962Smm /* 3784209962Smm * This test uses two objects, packobj and bigobj, that are always 3785209962Smm * updated together (i.e. in the same tx) so that their contents are 3786209962Smm * in sync and can be compared. Their contents relate to each other 3787209962Smm * in a simple way: packobj is a dense array of 'bufwad' structures, 3788209962Smm * while bigobj is a sparse array of the same bufwads. Specifically, 3789209962Smm * for any index n, there are three bufwads that should be identical: 3790209962Smm * 3791209962Smm * packobj, at offset n * sizeof (bufwad_t) 3792209962Smm * bigobj, at the head of the nth chunk 3793209962Smm * bigobj, at the tail of the nth chunk 3794209962Smm * 3795209962Smm * The chunk size is set equal to bigobj block size so that 3796209962Smm * dmu_assign_arcbuf() can be tested for object updates. 3797209962Smm */ 3798209962Smm 3799209962Smm /* 3800209962Smm * Read the directory info. If it's the first time, set things up. 3801209962Smm */ 3802219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 3803219089Spjd ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize); 3804209962Smm 3805219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 3806219089Spjd return; 3807209962Smm 3808219089Spjd bigobj = od[0].od_object; 3809219089Spjd packobj = od[1].od_object; 3810219089Spjd blocksize = od[0].od_blocksize; 3811219089Spjd chunksize = blocksize; 3812219089Spjd ASSERT(chunksize == od[1].od_gen); 3813209962Smm 3814219089Spjd VERIFY(dmu_object_info(os, bigobj, &doi) == 0); 3815219089Spjd VERIFY(ISP2(doi.doi_data_block_size)); 3816219089Spjd VERIFY(chunksize == doi.doi_data_block_size); 3817219089Spjd VERIFY(chunksize >= 2 * sizeof (bufwad_t)); 3818209962Smm 3819209962Smm /* 3820209962Smm * Pick a random index and compute the offsets into packobj and bigobj. 3821209962Smm */ 3822209962Smm n = ztest_random(regions) * stride + ztest_random(width); 3823209962Smm s = 1 + ztest_random(width - 1); 3824209962Smm 3825209962Smm packoff = n * sizeof (bufwad_t); 3826209962Smm packsize = s * sizeof (bufwad_t); 3827209962Smm 3828219089Spjd bigoff = n * chunksize; 3829219089Spjd bigsize = s * chunksize; 3830209962Smm 3831209962Smm packbuf = umem_zalloc(packsize, UMEM_NOFAIL); 3832209962Smm bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL); 3833209962Smm 3834219089Spjd VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db)); 3835209962Smm 3836209962Smm bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL); 3837209962Smm 3838209962Smm /* 3839209962Smm * Iteration 0 test zcopy for DB_UNCACHED dbufs. 3840209962Smm * Iteration 1 test zcopy to already referenced dbufs. 3841209962Smm * Iteration 2 test zcopy to dirty dbuf in the same txg. 3842209962Smm * Iteration 3 test zcopy to dbuf dirty in previous txg. 3843209962Smm * Iteration 4 test zcopy when dbuf is no longer dirty. 3844209962Smm * Iteration 5 test zcopy when it can't be done. 3845209962Smm * Iteration 6 one more zcopy write. 3846209962Smm */ 3847209962Smm for (i = 0; i < 7; i++) { 3848209962Smm uint64_t j; 3849209962Smm uint64_t off; 3850209962Smm 3851209962Smm /* 3852209962Smm * In iteration 5 (i == 5) use arcbufs 3853209962Smm * that don't match bigobj blksz to test 3854209962Smm * dmu_assign_arcbuf() when it can't directly 3855209962Smm * assign an arcbuf to a dbuf. 3856209962Smm */ 3857209962Smm for (j = 0; j < s; j++) { 3858209962Smm if (i != 5) { 3859209962Smm bigbuf_arcbufs[j] = 3860219089Spjd dmu_request_arcbuf(bonus_db, chunksize); 3861209962Smm } else { 3862209962Smm bigbuf_arcbufs[2 * j] = 3863219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3864209962Smm bigbuf_arcbufs[2 * j + 1] = 3865219089Spjd dmu_request_arcbuf(bonus_db, chunksize / 2); 3866209962Smm } 3867209962Smm } 3868209962Smm 3869209962Smm /* 3870209962Smm * Get a tx for the mods to both packobj and bigobj. 3871209962Smm */ 3872209962Smm tx = dmu_tx_create(os); 3873209962Smm 3874219089Spjd dmu_tx_hold_write(tx, packobj, packoff, packsize); 3875219089Spjd dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); 3876209962Smm 3877219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 3878219089Spjd if (txg == 0) { 3879209962Smm umem_free(packbuf, packsize); 3880209962Smm umem_free(bigbuf, bigsize); 3881209962Smm for (j = 0; j < s; j++) { 3882209962Smm if (i != 5) { 3883209962Smm dmu_return_arcbuf(bigbuf_arcbufs[j]); 3884209962Smm } else { 3885209962Smm dmu_return_arcbuf( 3886209962Smm bigbuf_arcbufs[2 * j]); 3887209962Smm dmu_return_arcbuf( 3888209962Smm bigbuf_arcbufs[2 * j + 1]); 3889209962Smm } 3890209962Smm } 3891209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 3892209962Smm dmu_buf_rele(bonus_db, FTAG); 3893209962Smm return; 3894209962Smm } 3895209962Smm 3896209962Smm /* 3897209962Smm * 50% of the time don't read objects in the 1st iteration to 3898209962Smm * test dmu_assign_arcbuf() for the case when there're no 3899209962Smm * existing dbufs for the specified offsets. 3900209962Smm */ 3901209962Smm if (i != 0 || ztest_random(2) != 0) { 3902219089Spjd error = dmu_read(os, packobj, packoff, 3903209962Smm packsize, packbuf, DMU_READ_PREFETCH); 3904240415Smm ASSERT0(error); 3905219089Spjd error = dmu_read(os, bigobj, bigoff, bigsize, 3906209962Smm bigbuf, DMU_READ_PREFETCH); 3907240415Smm ASSERT0(error); 3908209962Smm } 3909209962Smm compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, 3910219089Spjd n, chunksize, txg); 3911209962Smm 3912209962Smm /* 3913209962Smm * We've verified all the old bufwads, and made new ones. 3914209962Smm * Now write them out. 3915209962Smm */ 3916219089Spjd dmu_write(os, packobj, packoff, packsize, packbuf, tx); 3917236143Smm if (ztest_opts.zo_verbose >= 7) { 3918209962Smm (void) printf("writing offset %llx size %llx" 3919209962Smm " txg %llx\n", 3920209962Smm (u_longlong_t)bigoff, 3921209962Smm (u_longlong_t)bigsize, 3922209962Smm (u_longlong_t)txg); 3923209962Smm } 3924219089Spjd for (off = bigoff, j = 0; j < s; j++, off += chunksize) { 3925209962Smm dmu_buf_t *dbt; 3926209962Smm if (i != 5) { 3927209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3928219089Spjd bigbuf_arcbufs[j]->b_data, chunksize); 3929209962Smm } else { 3930209962Smm bcopy((caddr_t)bigbuf + (off - bigoff), 3931209962Smm bigbuf_arcbufs[2 * j]->b_data, 3932219089Spjd chunksize / 2); 3933209962Smm bcopy((caddr_t)bigbuf + (off - bigoff) + 3934219089Spjd chunksize / 2, 3935209962Smm bigbuf_arcbufs[2 * j + 1]->b_data, 3936219089Spjd chunksize / 2); 3937209962Smm } 3938209962Smm 3939209962Smm if (i == 1) { 3940219089Spjd VERIFY(dmu_buf_hold(os, bigobj, off, 3941219089Spjd FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); 3942209962Smm } 3943209962Smm if (i != 5) { 3944209962Smm dmu_assign_arcbuf(bonus_db, off, 3945209962Smm bigbuf_arcbufs[j], tx); 3946209962Smm } else { 3947209962Smm dmu_assign_arcbuf(bonus_db, off, 3948209962Smm bigbuf_arcbufs[2 * j], tx); 3949209962Smm dmu_assign_arcbuf(bonus_db, 3950219089Spjd off + chunksize / 2, 3951209962Smm bigbuf_arcbufs[2 * j + 1], tx); 3952209962Smm } 3953209962Smm if (i == 1) { 3954209962Smm dmu_buf_rele(dbt, FTAG); 3955209962Smm } 3956209962Smm } 3957209962Smm dmu_tx_commit(tx); 3958209962Smm 3959209962Smm /* 3960209962Smm * Sanity check the stuff we just wrote. 3961209962Smm */ 3962209962Smm { 3963209962Smm void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); 3964209962Smm void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); 3965209962Smm 3966219089Spjd VERIFY(0 == dmu_read(os, packobj, packoff, 3967209962Smm packsize, packcheck, DMU_READ_PREFETCH)); 3968219089Spjd VERIFY(0 == dmu_read(os, bigobj, bigoff, 3969209962Smm bigsize, bigcheck, DMU_READ_PREFETCH)); 3970209962Smm 3971209962Smm ASSERT(bcmp(packbuf, packcheck, packsize) == 0); 3972209962Smm ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); 3973209962Smm 3974209962Smm umem_free(packcheck, packsize); 3975209962Smm umem_free(bigcheck, bigsize); 3976209962Smm } 3977209962Smm if (i == 2) { 3978209962Smm txg_wait_open(dmu_objset_pool(os), 0); 3979209962Smm } else if (i == 3) { 3980209962Smm txg_wait_synced(dmu_objset_pool(os), 0); 3981209962Smm } 3982209962Smm } 3983209962Smm 3984209962Smm dmu_buf_rele(bonus_db, FTAG); 3985209962Smm umem_free(packbuf, packsize); 3986209962Smm umem_free(bigbuf, bigsize); 3987209962Smm umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); 3988209962Smm} 3989209962Smm 3990219089Spjd/* ARGSUSED */ 3991209962Smmvoid 3992219089Spjdztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id) 3993168404Spjd{ 3994219089Spjd ztest_od_t od[1]; 3995219089Spjd uint64_t offset = (1ULL << (ztest_random(20) + 43)) + 3996219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 3997168404Spjd 3998168404Spjd /* 3999219089Spjd * Have multiple threads write to large offsets in an object 4000219089Spjd * to verify that parallel writes to an object -- even to the 4001219089Spjd * same blocks within the object -- doesn't cause any trouble. 4002168404Spjd */ 4003219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4004219089Spjd 4005219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4006219089Spjd return; 4007219089Spjd 4008219089Spjd while (ztest_random(10) != 0) 4009219089Spjd ztest_io(zd, od[0].od_object, offset); 4010168404Spjd} 4011168404Spjd 4012168404Spjdvoid 4013219089Spjdztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id) 4014168404Spjd{ 4015219089Spjd ztest_od_t od[1]; 4016219089Spjd uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) + 4017219089Spjd (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT); 4018219089Spjd uint64_t count = ztest_random(20) + 1; 4019219089Spjd uint64_t blocksize = ztest_random_blocksize(); 4020219089Spjd void *data; 4021168404Spjd 4022219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4023168404Spjd 4024219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4025185029Spjd return; 4026168404Spjd 4027219089Spjd if (ztest_truncate(zd, od[0].od_object, offset, count * blocksize) != 0) 4028185029Spjd return; 4029168404Spjd 4030219089Spjd ztest_prealloc(zd, od[0].od_object, offset, count * blocksize); 4031185029Spjd 4032219089Spjd data = umem_zalloc(blocksize, UMEM_NOFAIL); 4033185029Spjd 4034219089Spjd while (ztest_random(count) != 0) { 4035219089Spjd uint64_t randoff = offset + (ztest_random(count) * blocksize); 4036219089Spjd if (ztest_write(zd, od[0].od_object, randoff, blocksize, 4037219089Spjd data) != 0) 4038219089Spjd break; 4039219089Spjd while (ztest_random(4) != 0) 4040219089Spjd ztest_io(zd, od[0].od_object, randoff); 4041185029Spjd } 4042168404Spjd 4043219089Spjd umem_free(data, blocksize); 4044168404Spjd} 4045168404Spjd 4046168404Spjd/* 4047168404Spjd * Verify that zap_{create,destroy,add,remove,update} work as expected. 4048168404Spjd */ 4049168404Spjd#define ZTEST_ZAP_MIN_INTS 1 4050168404Spjd#define ZTEST_ZAP_MAX_INTS 4 4051168404Spjd#define ZTEST_ZAP_MAX_PROPS 1000 4052168404Spjd 4053168404Spjdvoid 4054219089Spjdztest_zap(ztest_ds_t *zd, uint64_t id) 4055168404Spjd{ 4056219089Spjd objset_t *os = zd->zd_os; 4057219089Spjd ztest_od_t od[1]; 4058168404Spjd uint64_t object; 4059168404Spjd uint64_t txg, last_txg; 4060168404Spjd uint64_t value[ZTEST_ZAP_MAX_INTS]; 4061168404Spjd uint64_t zl_ints, zl_intsize, prop; 4062168404Spjd int i, ints; 4063168404Spjd dmu_tx_t *tx; 4064168404Spjd char propname[100], txgname[100]; 4065168404Spjd int error; 4066168404Spjd char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" }; 4067168404Spjd 4068219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4069168404Spjd 4070219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4071219089Spjd return; 4072219089Spjd 4073219089Spjd object = od[0].od_object; 4074219089Spjd 4075168404Spjd /* 4076219089Spjd * Generate a known hash collision, and verify that 4077219089Spjd * we can lookup and remove both entries. 4078168404Spjd */ 4079219089Spjd tx = dmu_tx_create(os); 4080219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4081219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4082219089Spjd if (txg == 0) 4083219089Spjd return; 4084219089Spjd for (i = 0; i < 2; i++) { 4085219089Spjd value[i] = i; 4086219089Spjd VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t), 4087219089Spjd 1, &value[i], tx)); 4088168404Spjd } 4089219089Spjd for (i = 0; i < 2; i++) { 4090219089Spjd VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i], 4091219089Spjd sizeof (uint64_t), 1, &value[i], tx)); 4092219089Spjd VERIFY3U(0, ==, 4093219089Spjd zap_length(os, object, hc[i], &zl_intsize, &zl_ints)); 4094219089Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4095219089Spjd ASSERT3U(zl_ints, ==, 1); 4096219089Spjd } 4097219089Spjd for (i = 0; i < 2; i++) { 4098219089Spjd VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx)); 4099219089Spjd } 4100219089Spjd dmu_tx_commit(tx); 4101168404Spjd 4102219089Spjd /* 4103219089Spjd * Generate a buch of random entries. 4104219089Spjd */ 4105168404Spjd ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS); 4106168404Spjd 4107185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4108185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4109185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4110185029Spjd bzero(value, sizeof (value)); 4111185029Spjd last_txg = 0; 4112168404Spjd 4113185029Spjd /* 4114185029Spjd * If these zap entries already exist, validate their contents. 4115185029Spjd */ 4116185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4117185029Spjd if (error == 0) { 4118185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4119185029Spjd ASSERT3U(zl_ints, ==, 1); 4120168404Spjd 4121185029Spjd VERIFY(zap_lookup(os, object, txgname, zl_intsize, 4122185029Spjd zl_ints, &last_txg) == 0); 4123168404Spjd 4124185029Spjd VERIFY(zap_length(os, object, propname, &zl_intsize, 4125185029Spjd &zl_ints) == 0); 4126168404Spjd 4127185029Spjd ASSERT3U(zl_intsize, ==, sizeof (uint64_t)); 4128185029Spjd ASSERT3U(zl_ints, ==, ints); 4129168404Spjd 4130185029Spjd VERIFY(zap_lookup(os, object, propname, zl_intsize, 4131185029Spjd zl_ints, value) == 0); 4132168404Spjd 4133185029Spjd for (i = 0; i < ints; i++) { 4134185029Spjd ASSERT3U(value[i], ==, last_txg + object + i); 4135168404Spjd } 4136185029Spjd } else { 4137185029Spjd ASSERT3U(error, ==, ENOENT); 4138185029Spjd } 4139168404Spjd 4140185029Spjd /* 4141185029Spjd * Atomically update two entries in our zap object. 4142185029Spjd * The first is named txg_%llu, and contains the txg 4143185029Spjd * in which the property was last updated. The second 4144185029Spjd * is named prop_%llu, and the nth element of its value 4145185029Spjd * should be txg + object + n. 4146185029Spjd */ 4147185029Spjd tx = dmu_tx_create(os); 4148219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4149219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4150219089Spjd if (txg == 0) 4151185029Spjd return; 4152168404Spjd 4153185029Spjd if (last_txg > txg) 4154185029Spjd fatal(0, "zap future leak: old %llu new %llu", last_txg, txg); 4155168404Spjd 4156185029Spjd for (i = 0; i < ints; i++) 4157185029Spjd value[i] = txg + object + i; 4158168404Spjd 4159219089Spjd VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t), 4160219089Spjd 1, &txg, tx)); 4161219089Spjd VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t), 4162219089Spjd ints, value, tx)); 4163168404Spjd 4164185029Spjd dmu_tx_commit(tx); 4165168404Spjd 4166185029Spjd /* 4167185029Spjd * Remove a random pair of entries. 4168185029Spjd */ 4169185029Spjd prop = ztest_random(ZTEST_ZAP_MAX_PROPS); 4170185029Spjd (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop); 4171185029Spjd (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop); 4172168404Spjd 4173185029Spjd error = zap_length(os, object, txgname, &zl_intsize, &zl_ints); 4174168404Spjd 4175185029Spjd if (error == ENOENT) 4176185029Spjd return; 4177168404Spjd 4178240415Smm ASSERT0(error); 4179168404Spjd 4180185029Spjd tx = dmu_tx_create(os); 4181219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4182219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4183219089Spjd if (txg == 0) 4184185029Spjd return; 4185219089Spjd VERIFY3U(0, ==, zap_remove(os, object, txgname, tx)); 4186219089Spjd VERIFY3U(0, ==, zap_remove(os, object, propname, tx)); 4187185029Spjd dmu_tx_commit(tx); 4188168404Spjd} 4189168404Spjd 4190209962Smm/* 4191209962Smm * Testcase to test the upgrading of a microzap to fatzap. 4192209962Smm */ 4193168404Spjdvoid 4194219089Spjdztest_fzap(ztest_ds_t *zd, uint64_t id) 4195209962Smm{ 4196219089Spjd objset_t *os = zd->zd_os; 4197219089Spjd ztest_od_t od[1]; 4198219089Spjd uint64_t object, txg; 4199209962Smm 4200219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0); 4201209962Smm 4202219089Spjd if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0) 4203219089Spjd return; 4204209962Smm 4205219089Spjd object = od[0].od_object; 4206209962Smm 4207209962Smm /* 4208219089Spjd * Add entries to this ZAP and make sure it spills over 4209209962Smm * and gets upgraded to a fatzap. Also, since we are adding 4210219089Spjd * 2050 entries we should see ptrtbl growth and leaf-block split. 4211209962Smm */ 4212219089Spjd for (int i = 0; i < 2050; i++) { 4213219089Spjd char name[MAXNAMELEN]; 4214219089Spjd uint64_t value = i; 4215219089Spjd dmu_tx_t *tx; 4216219089Spjd int error; 4217209962Smm 4218219089Spjd (void) snprintf(name, sizeof (name), "fzap-%llu-%llu", 4219219089Spjd id, value); 4220219089Spjd 4221209962Smm tx = dmu_tx_create(os); 4222219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, name); 4223219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4224219089Spjd if (txg == 0) 4225209962Smm return; 4226219089Spjd error = zap_add(os, object, name, sizeof (uint64_t), 1, 4227219089Spjd &value, tx); 4228209962Smm ASSERT(error == 0 || error == EEXIST); 4229209962Smm dmu_tx_commit(tx); 4230209962Smm } 4231209962Smm} 4232209962Smm 4233219089Spjd/* ARGSUSED */ 4234209962Smmvoid 4235219089Spjdztest_zap_parallel(ztest_ds_t *zd, uint64_t id) 4236168404Spjd{ 4237219089Spjd objset_t *os = zd->zd_os; 4238219089Spjd ztest_od_t od[1]; 4239168404Spjd uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc; 4240168404Spjd dmu_tx_t *tx; 4241168404Spjd int i, namelen, error; 4242219089Spjd int micro = ztest_random(2); 4243168404Spjd char name[20], string_value[20]; 4244168404Spjd void *data; 4245168404Spjd 4246219089Spjd ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0); 4247219089Spjd 4248219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4249219089Spjd return; 4250219089Spjd 4251219089Spjd object = od[0].od_object; 4252219089Spjd 4253185029Spjd /* 4254185029Spjd * Generate a random name of the form 'xxx.....' where each 4255185029Spjd * x is a random printable character and the dots are dots. 4256185029Spjd * There are 94 such characters, and the name length goes from 4257185029Spjd * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names. 4258185029Spjd */ 4259185029Spjd namelen = ztest_random(sizeof (name) - 5) + 5 + 1; 4260168404Spjd 4261185029Spjd for (i = 0; i < 3; i++) 4262185029Spjd name[i] = '!' + ztest_random('~' - '!' + 1); 4263185029Spjd for (; i < namelen - 1; i++) 4264185029Spjd name[i] = '.'; 4265185029Spjd name[i] = '\0'; 4266168404Spjd 4267219089Spjd if ((namelen & 1) || micro) { 4268185029Spjd wsize = sizeof (txg); 4269185029Spjd wc = 1; 4270185029Spjd data = &txg; 4271185029Spjd } else { 4272185029Spjd wsize = 1; 4273185029Spjd wc = namelen; 4274185029Spjd data = string_value; 4275185029Spjd } 4276168404Spjd 4277185029Spjd count = -1ULL; 4278248571Smm VERIFY0(zap_count(os, object, &count)); 4279185029Spjd ASSERT(count != -1ULL); 4280168404Spjd 4281185029Spjd /* 4282185029Spjd * Select an operation: length, lookup, add, update, remove. 4283185029Spjd */ 4284185029Spjd i = ztest_random(5); 4285168404Spjd 4286185029Spjd if (i >= 2) { 4287185029Spjd tx = dmu_tx_create(os); 4288219089Spjd dmu_tx_hold_zap(tx, object, B_TRUE, NULL); 4289219089Spjd txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); 4290219089Spjd if (txg == 0) 4291185029Spjd return; 4292185029Spjd bcopy(name, string_value, namelen); 4293185029Spjd } else { 4294185029Spjd tx = NULL; 4295185029Spjd txg = 0; 4296185029Spjd bzero(string_value, namelen); 4297185029Spjd } 4298168404Spjd 4299185029Spjd switch (i) { 4300168404Spjd 4301185029Spjd case 0: 4302185029Spjd error = zap_length(os, object, name, &zl_wsize, &zl_wc); 4303185029Spjd if (error == 0) { 4304185029Spjd ASSERT3U(wsize, ==, zl_wsize); 4305185029Spjd ASSERT3U(wc, ==, zl_wc); 4306185029Spjd } else { 4307185029Spjd ASSERT3U(error, ==, ENOENT); 4308185029Spjd } 4309185029Spjd break; 4310168404Spjd 4311185029Spjd case 1: 4312185029Spjd error = zap_lookup(os, object, name, wsize, wc, data); 4313185029Spjd if (error == 0) { 4314185029Spjd if (data == string_value && 4315185029Spjd bcmp(name, data, namelen) != 0) 4316185029Spjd fatal(0, "name '%s' != val '%s' len %d", 4317185029Spjd name, data, namelen); 4318185029Spjd } else { 4319185029Spjd ASSERT3U(error, ==, ENOENT); 4320185029Spjd } 4321185029Spjd break; 4322168404Spjd 4323185029Spjd case 2: 4324185029Spjd error = zap_add(os, object, name, wsize, wc, data, tx); 4325185029Spjd ASSERT(error == 0 || error == EEXIST); 4326185029Spjd break; 4327168404Spjd 4328185029Spjd case 3: 4329185029Spjd VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0); 4330185029Spjd break; 4331168404Spjd 4332185029Spjd case 4: 4333185029Spjd error = zap_remove(os, object, name, tx); 4334185029Spjd ASSERT(error == 0 || error == ENOENT); 4335185029Spjd break; 4336185029Spjd } 4337168404Spjd 4338185029Spjd if (tx != NULL) 4339185029Spjd dmu_tx_commit(tx); 4340168404Spjd} 4341168404Spjd 4342219089Spjd/* 4343219089Spjd * Commit callback data. 4344219089Spjd */ 4345219089Spjdtypedef struct ztest_cb_data { 4346219089Spjd list_node_t zcd_node; 4347219089Spjd uint64_t zcd_txg; 4348219089Spjd int zcd_expected_err; 4349219089Spjd boolean_t zcd_added; 4350219089Spjd boolean_t zcd_called; 4351219089Spjd spa_t *zcd_spa; 4352219089Spjd} ztest_cb_data_t; 4353219089Spjd 4354219089Spjd/* This is the actual commit callback function */ 4355219089Spjdstatic void 4356219089Spjdztest_commit_callback(void *arg, int error) 4357219089Spjd{ 4358219089Spjd ztest_cb_data_t *data = arg; 4359219089Spjd uint64_t synced_txg; 4360219089Spjd 4361219089Spjd VERIFY(data != NULL); 4362219089Spjd VERIFY3S(data->zcd_expected_err, ==, error); 4363219089Spjd VERIFY(!data->zcd_called); 4364219089Spjd 4365219089Spjd synced_txg = spa_last_synced_txg(data->zcd_spa); 4366219089Spjd if (data->zcd_txg > synced_txg) 4367219089Spjd fatal(0, "commit callback of txg %" PRIu64 " called prematurely" 4368219089Spjd ", last synced txg = %" PRIu64 "\n", data->zcd_txg, 4369219089Spjd synced_txg); 4370219089Spjd 4371219089Spjd data->zcd_called = B_TRUE; 4372219089Spjd 4373219089Spjd if (error == ECANCELED) { 4374240415Smm ASSERT0(data->zcd_txg); 4375219089Spjd ASSERT(!data->zcd_added); 4376219089Spjd 4377219089Spjd /* 4378219089Spjd * The private callback data should be destroyed here, but 4379219089Spjd * since we are going to check the zcd_called field after 4380219089Spjd * dmu_tx_abort(), we will destroy it there. 4381219089Spjd */ 4382219089Spjd return; 4383219089Spjd } 4384219089Spjd 4385219089Spjd /* Was this callback added to the global callback list? */ 4386219089Spjd if (!data->zcd_added) 4387219089Spjd goto out; 4388219089Spjd 4389219089Spjd ASSERT3U(data->zcd_txg, !=, 0); 4390219089Spjd 4391219089Spjd /* Remove our callback from the list */ 4392219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4393219089Spjd list_remove(&zcl.zcl_callbacks, data); 4394219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4395219089Spjd 4396219089Spjdout: 4397219089Spjd umem_free(data, sizeof (ztest_cb_data_t)); 4398219089Spjd} 4399219089Spjd 4400219089Spjd/* Allocate and initialize callback data structure */ 4401219089Spjdstatic ztest_cb_data_t * 4402219089Spjdztest_create_cb_data(objset_t *os, uint64_t txg) 4403219089Spjd{ 4404219089Spjd ztest_cb_data_t *cb_data; 4405219089Spjd 4406219089Spjd cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL); 4407219089Spjd 4408219089Spjd cb_data->zcd_txg = txg; 4409219089Spjd cb_data->zcd_spa = dmu_objset_spa(os); 4410219089Spjd 4411219089Spjd return (cb_data); 4412219089Spjd} 4413219089Spjd 4414219089Spjd/* 4415219089Spjd * If a number of txgs equal to this threshold have been created after a commit 4416219089Spjd * callback has been registered but not called, then we assume there is an 4417219089Spjd * implementation bug. 4418219089Spjd */ 4419219089Spjd#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2) 4420219089Spjd 4421219089Spjd/* 4422219089Spjd * Commit callback test. 4423219089Spjd */ 4424168404Spjdvoid 4425219089Spjdztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id) 4426168404Spjd{ 4427219089Spjd objset_t *os = zd->zd_os; 4428219089Spjd ztest_od_t od[1]; 4429219089Spjd dmu_tx_t *tx; 4430219089Spjd ztest_cb_data_t *cb_data[3], *tmp_cb; 4431219089Spjd uint64_t old_txg, txg; 4432219089Spjd int i, error; 4433219089Spjd 4434219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); 4435219089Spjd 4436219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4437219089Spjd return; 4438219089Spjd 4439219089Spjd tx = dmu_tx_create(os); 4440219089Spjd 4441219089Spjd cb_data[0] = ztest_create_cb_data(os, 0); 4442219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]); 4443219089Spjd 4444219089Spjd dmu_tx_hold_write(tx, od[0].od_object, 0, sizeof (uint64_t)); 4445219089Spjd 4446219089Spjd /* Every once in a while, abort the transaction on purpose */ 4447219089Spjd if (ztest_random(100) == 0) 4448219089Spjd error = -1; 4449219089Spjd 4450219089Spjd if (!error) 4451219089Spjd error = dmu_tx_assign(tx, TXG_NOWAIT); 4452219089Spjd 4453219089Spjd txg = error ? 0 : dmu_tx_get_txg(tx); 4454219089Spjd 4455219089Spjd cb_data[0]->zcd_txg = txg; 4456219089Spjd cb_data[1] = ztest_create_cb_data(os, txg); 4457219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]); 4458219089Spjd 4459219089Spjd if (error) { 4460219089Spjd /* 4461219089Spjd * It's not a strict requirement to call the registered 4462219089Spjd * callbacks from inside dmu_tx_abort(), but that's what 4463219089Spjd * it's supposed to happen in the current implementation 4464219089Spjd * so we will check for that. 4465219089Spjd */ 4466219089Spjd for (i = 0; i < 2; i++) { 4467219089Spjd cb_data[i]->zcd_expected_err = ECANCELED; 4468219089Spjd VERIFY(!cb_data[i]->zcd_called); 4469219089Spjd } 4470219089Spjd 4471219089Spjd dmu_tx_abort(tx); 4472219089Spjd 4473219089Spjd for (i = 0; i < 2; i++) { 4474219089Spjd VERIFY(cb_data[i]->zcd_called); 4475219089Spjd umem_free(cb_data[i], sizeof (ztest_cb_data_t)); 4476219089Spjd } 4477219089Spjd 4478219089Spjd return; 4479219089Spjd } 4480219089Spjd 4481219089Spjd cb_data[2] = ztest_create_cb_data(os, txg); 4482219089Spjd dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]); 4483219089Spjd 4484219089Spjd /* 4485219089Spjd * Read existing data to make sure there isn't a future leak. 4486219089Spjd */ 4487219089Spjd VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t), 4488219089Spjd &old_txg, DMU_READ_PREFETCH)); 4489219089Spjd 4490219089Spjd if (old_txg > txg) 4491219089Spjd fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64, 4492219089Spjd old_txg, txg); 4493219089Spjd 4494219089Spjd dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx); 4495219089Spjd 4496219089Spjd (void) mutex_lock(&zcl.zcl_callbacks_lock); 4497219089Spjd 4498219089Spjd /* 4499219089Spjd * Since commit callbacks don't have any ordering requirement and since 4500219089Spjd * it is theoretically possible for a commit callback to be called 4501219089Spjd * after an arbitrary amount of time has elapsed since its txg has been 4502219089Spjd * synced, it is difficult to reliably determine whether a commit 4503219089Spjd * callback hasn't been called due to high load or due to a flawed 4504219089Spjd * implementation. 4505219089Spjd * 4506219089Spjd * In practice, we will assume that if after a certain number of txgs a 4507219089Spjd * commit callback hasn't been called, then most likely there's an 4508219089Spjd * implementation bug.. 4509219089Spjd */ 4510219089Spjd tmp_cb = list_head(&zcl.zcl_callbacks); 4511219089Spjd if (tmp_cb != NULL && 4512251635Sdelphij (txg - ZTEST_COMMIT_CALLBACK_THRESH) > tmp_cb->zcd_txg) { 4513219089Spjd fatal(0, "Commit callback threshold exceeded, oldest txg: %" 4514219089Spjd PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg); 4515219089Spjd } 4516219089Spjd 4517219089Spjd /* 4518219089Spjd * Let's find the place to insert our callbacks. 4519219089Spjd * 4520219089Spjd * Even though the list is ordered by txg, it is possible for the 4521219089Spjd * insertion point to not be the end because our txg may already be 4522219089Spjd * quiescing at this point and other callbacks in the open txg 4523219089Spjd * (from other objsets) may have sneaked in. 4524219089Spjd */ 4525219089Spjd tmp_cb = list_tail(&zcl.zcl_callbacks); 4526219089Spjd while (tmp_cb != NULL && tmp_cb->zcd_txg > txg) 4527219089Spjd tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb); 4528219089Spjd 4529219089Spjd /* Add the 3 callbacks to the list */ 4530219089Spjd for (i = 0; i < 3; i++) { 4531219089Spjd if (tmp_cb == NULL) 4532219089Spjd list_insert_head(&zcl.zcl_callbacks, cb_data[i]); 4533219089Spjd else 4534219089Spjd list_insert_after(&zcl.zcl_callbacks, tmp_cb, 4535219089Spjd cb_data[i]); 4536219089Spjd 4537219089Spjd cb_data[i]->zcd_added = B_TRUE; 4538219089Spjd VERIFY(!cb_data[i]->zcd_called); 4539219089Spjd 4540219089Spjd tmp_cb = cb_data[i]; 4541219089Spjd } 4542219089Spjd 4543219089Spjd (void) mutex_unlock(&zcl.zcl_callbacks_lock); 4544219089Spjd 4545219089Spjd dmu_tx_commit(tx); 4546219089Spjd} 4547219089Spjd 4548219089Spjd/* ARGSUSED */ 4549219089Spjdvoid 4550219089Spjdztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id) 4551219089Spjd{ 4552219089Spjd zfs_prop_t proplist[] = { 4553219089Spjd ZFS_PROP_CHECKSUM, 4554219089Spjd ZFS_PROP_COMPRESSION, 4555219089Spjd ZFS_PROP_COPIES, 4556219089Spjd ZFS_PROP_DEDUP 4557219089Spjd }; 4558219089Spjd 4559236143Smm (void) rw_rdlock(&ztest_name_lock); 4560219089Spjd 4561219089Spjd for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++) 4562219089Spjd (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p], 4563219089Spjd ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2)); 4564219089Spjd 4565236143Smm (void) rw_unlock(&ztest_name_lock); 4566219089Spjd} 4567219089Spjd 4568219089Spjd/* ARGSUSED */ 4569219089Spjdvoid 4570219089Spjdztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id) 4571219089Spjd{ 4572219089Spjd nvlist_t *props = NULL; 4573219089Spjd 4574236143Smm (void) rw_rdlock(&ztest_name_lock); 4575219089Spjd 4576236143Smm (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO, 4577219089Spjd ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN)); 4578219089Spjd 4579240415Smm VERIFY0(spa_prop_get(ztest_spa, &props)); 4580219089Spjd 4581236143Smm if (ztest_opts.zo_verbose >= 6) 4582219089Spjd dump_nvlist(props, 4); 4583219089Spjd 4584219089Spjd nvlist_free(props); 4585219089Spjd 4586236143Smm (void) rw_unlock(&ztest_name_lock); 4587219089Spjd} 4588219089Spjd 4589248571Smmstatic int 4590248571Smmuser_release_one(const char *snapname, const char *holdname) 4591248571Smm{ 4592248571Smm nvlist_t *snaps, *holds; 4593248571Smm int error; 4594248571Smm 4595248571Smm snaps = fnvlist_alloc(); 4596248571Smm holds = fnvlist_alloc(); 4597248571Smm fnvlist_add_boolean(holds, holdname); 4598248571Smm fnvlist_add_nvlist(snaps, snapname, holds); 4599248571Smm fnvlist_free(holds); 4600248571Smm error = dsl_dataset_user_release(snaps, NULL); 4601248571Smm fnvlist_free(snaps); 4602248571Smm return (error); 4603248571Smm} 4604248571Smm 4605219089Spjd/* 4606219089Spjd * Test snapshot hold/release and deferred destroy. 4607219089Spjd */ 4608219089Spjdvoid 4609219089Spjdztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) 4610219089Spjd{ 4611219089Spjd int error; 4612219089Spjd objset_t *os = zd->zd_os; 4613219089Spjd objset_t *origin; 4614219089Spjd char snapname[100]; 4615219089Spjd char fullname[100]; 4616219089Spjd char clonename[100]; 4617219089Spjd char tag[100]; 4618168404Spjd char osname[MAXNAMELEN]; 4619248571Smm nvlist_t *holds; 4620168404Spjd 4621236143Smm (void) rw_rdlock(&ztest_name_lock); 4622168404Spjd 4623168404Spjd dmu_objset_name(os, osname); 4624168404Spjd 4625248571Smm (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id); 4626248571Smm (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname); 4627248571Smm (void) snprintf(clonename, sizeof (clonename), 4628248571Smm "%s/ch1_%llu", osname, id); 4629248571Smm (void) snprintf(tag, sizeof (tag), "tag_%llu", id); 4630219089Spjd 4631219089Spjd /* 4632219089Spjd * Clean up from any previous run. 4633219089Spjd */ 4634248571Smm error = dsl_destroy_head(clonename); 4635248571Smm if (error != ENOENT) 4636248571Smm ASSERT0(error); 4637248571Smm error = user_release_one(fullname, tag); 4638248571Smm if (error != ESRCH && error != ENOENT) 4639248571Smm ASSERT0(error); 4640248571Smm error = dsl_destroy_snapshot(fullname, B_FALSE); 4641248571Smm if (error != ENOENT) 4642248571Smm ASSERT0(error); 4643219089Spjd 4644219089Spjd /* 4645219089Spjd * Create snapshot, clone it, mark snap for deferred destroy, 4646219089Spjd * destroy clone, verify snap was also destroyed. 4647219089Spjd */ 4648248571Smm error = dmu_objset_snapshot_one(osname, snapname); 4649219089Spjd if (error) { 4650219089Spjd if (error == ENOSPC) { 4651219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4652219089Spjd goto out; 4653168404Spjd } 4654219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4655219089Spjd } 4656168404Spjd 4657248571Smm error = dmu_objset_clone(clonename, fullname); 4658219089Spjd if (error) { 4659168404Spjd if (error == ENOSPC) { 4660219089Spjd ztest_record_enospc("dmu_objset_clone"); 4661219089Spjd goto out; 4662168404Spjd } 4663219089Spjd fatal(0, "dmu_objset_clone(%s) = %d", clonename, error); 4664219089Spjd } 4665168404Spjd 4666248571Smm error = dsl_destroy_snapshot(fullname, B_TRUE); 4667219089Spjd if (error) { 4668248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", 4669219089Spjd fullname, error); 4670219089Spjd } 4671168404Spjd 4672248571Smm error = dsl_destroy_head(clonename); 4673219089Spjd if (error) 4674248571Smm fatal(0, "dsl_destroy_head(%s) = %d", clonename, error); 4675168404Spjd 4676219089Spjd error = dmu_objset_hold(fullname, FTAG, &origin); 4677219089Spjd if (error != ENOENT) 4678219089Spjd fatal(0, "dmu_objset_hold(%s) = %d", fullname, error); 4679168404Spjd 4680219089Spjd /* 4681219089Spjd * Create snapshot, add temporary hold, verify that we can't 4682219089Spjd * destroy a held snapshot, mark for deferred destroy, 4683219089Spjd * release hold, verify snapshot was destroyed. 4684219089Spjd */ 4685248571Smm error = dmu_objset_snapshot_one(osname, snapname); 4686219089Spjd if (error) { 4687219089Spjd if (error == ENOSPC) { 4688219089Spjd ztest_record_enospc("dmu_objset_snapshot"); 4689219089Spjd goto out; 4690168404Spjd } 4691219089Spjd fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error); 4692168404Spjd } 4693168404Spjd 4694248571Smm holds = fnvlist_alloc(); 4695248571Smm fnvlist_add_string(holds, fullname, tag); 4696248571Smm error = dsl_dataset_user_hold(holds, 0, NULL); 4697248571Smm fnvlist_free(holds); 4698248571Smm 4699219089Spjd if (error) 4700219089Spjd fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag); 4701219089Spjd 4702248571Smm error = dsl_destroy_snapshot(fullname, B_FALSE); 4703219089Spjd if (error != EBUSY) { 4704248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d", 4705219089Spjd fullname, error); 4706219089Spjd } 4707219089Spjd 4708248571Smm error = dsl_destroy_snapshot(fullname, B_TRUE); 4709219089Spjd if (error) { 4710248571Smm fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d", 4711219089Spjd fullname, error); 4712219089Spjd } 4713219089Spjd 4714248571Smm error = user_release_one(fullname, tag); 4715219089Spjd if (error) 4716248571Smm fatal(0, "user_release_one(%s)", fullname, tag); 4717219089Spjd 4718248571Smm VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT); 4719219089Spjd 4720219089Spjdout: 4721236143Smm (void) rw_unlock(&ztest_name_lock); 4722168404Spjd} 4723168404Spjd 4724168404Spjd/* 4725168404Spjd * Inject random faults into the on-disk data. 4726168404Spjd */ 4727219089Spjd/* ARGSUSED */ 4728168404Spjdvoid 4729219089Spjdztest_fault_inject(ztest_ds_t *zd, uint64_t id) 4730168404Spjd{ 4731219089Spjd ztest_shared_t *zs = ztest_shared; 4732236143Smm spa_t *spa = ztest_spa; 4733168404Spjd int fd; 4734168404Spjd uint64_t offset; 4735219089Spjd uint64_t leaves; 4736168404Spjd uint64_t bad = 0x1990c0ffeedecadeULL; 4737168404Spjd uint64_t top, leaf; 4738168404Spjd char path0[MAXPATHLEN]; 4739168404Spjd char pathrand[MAXPATHLEN]; 4740168404Spjd size_t fsize; 4741168404Spjd int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ 4742168404Spjd int iters = 1000; 4743219089Spjd int maxfaults; 4744219089Spjd int mirror_save; 4745185029Spjd vdev_t *vd0 = NULL; 4746168404Spjd uint64_t guid0 = 0; 4747219089Spjd boolean_t islog = B_FALSE; 4748168404Spjd 4749236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4750219089Spjd maxfaults = MAXFAULTS(); 4751236143Smm leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; 4752219089Spjd mirror_save = zs->zs_mirrors; 4753236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4754219089Spjd 4755185029Spjd ASSERT(leaves >= 1); 4756168404Spjd 4757168404Spjd /* 4758185029Spjd * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. 4759168404Spjd */ 4760185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4761168404Spjd 4762185029Spjd if (ztest_random(2) == 0) { 4763185029Spjd /* 4764219089Spjd * Inject errors on a normal data device or slog device. 4765185029Spjd */ 4766219089Spjd top = ztest_random_vdev_top(spa, B_TRUE); 4767219089Spjd leaf = ztest_random(leaves) + zs->zs_splits; 4768168404Spjd 4769185029Spjd /* 4770185029Spjd * Generate paths to the first leaf in this top-level vdev, 4771185029Spjd * and to the random leaf we selected. We'll induce transient 4772185029Spjd * write failures and random online/offline activity on leaf 0, 4773185029Spjd * and we'll write random garbage to the randomly chosen leaf. 4774185029Spjd */ 4775185029Spjd (void) snprintf(path0, sizeof (path0), ztest_dev_template, 4776236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4777236143Smm top * leaves + zs->zs_splits); 4778185029Spjd (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template, 4779236143Smm ztest_opts.zo_dir, ztest_opts.zo_pool, 4780236143Smm top * leaves + leaf); 4781168404Spjd 4782185029Spjd vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); 4783219089Spjd if (vd0 != NULL && vd0->vdev_top->vdev_islog) 4784219089Spjd islog = B_TRUE; 4785219089Spjd 4786185029Spjd if (vd0 != NULL && maxfaults != 1) { 4787185029Spjd /* 4788185029Spjd * Make vd0 explicitly claim to be unreadable, 4789185029Spjd * or unwriteable, or reach behind its back 4790185029Spjd * and close the underlying fd. We can do this if 4791185029Spjd * maxfaults == 0 because we'll fail and reexecute, 4792185029Spjd * and we can do it if maxfaults >= 2 because we'll 4793185029Spjd * have enough redundancy. If maxfaults == 1, the 4794185029Spjd * combination of this with injection of random data 4795185029Spjd * corruption below exceeds the pool's fault tolerance. 4796185029Spjd */ 4797185029Spjd vdev_file_t *vf = vd0->vdev_tsd; 4798168404Spjd 4799185029Spjd if (vf != NULL && ztest_random(3) == 0) { 4800185029Spjd (void) close(vf->vf_vnode->v_fd); 4801185029Spjd vf->vf_vnode->v_fd = -1; 4802185029Spjd } else if (ztest_random(2) == 0) { 4803185029Spjd vd0->vdev_cant_read = B_TRUE; 4804185029Spjd } else { 4805185029Spjd vd0->vdev_cant_write = B_TRUE; 4806185029Spjd } 4807185029Spjd guid0 = vd0->vdev_guid; 4808185029Spjd } 4809185029Spjd } else { 4810185029Spjd /* 4811185029Spjd * Inject errors on an l2cache device. 4812185029Spjd */ 4813185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 4814168404Spjd 4815185029Spjd if (sav->sav_count == 0) { 4816185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4817185029Spjd return; 4818185029Spjd } 4819185029Spjd vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; 4820168404Spjd guid0 = vd0->vdev_guid; 4821185029Spjd (void) strcpy(path0, vd0->vdev_path); 4822185029Spjd (void) strcpy(pathrand, vd0->vdev_path); 4823185029Spjd 4824185029Spjd leaf = 0; 4825185029Spjd leaves = 1; 4826185029Spjd maxfaults = INT_MAX; /* no limit on cache devices */ 4827168404Spjd } 4828168404Spjd 4829185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4830185029Spjd 4831168404Spjd /* 4832219089Spjd * If we can tolerate two or more faults, or we're dealing 4833219089Spjd * with a slog, randomly online/offline vd0. 4834168404Spjd */ 4835219089Spjd if ((maxfaults >= 2 || islog) && guid0 != 0) { 4836209962Smm if (ztest_random(10) < 6) { 4837209962Smm int flags = (ztest_random(2) == 0 ? 4838209962Smm ZFS_OFFLINE_TEMPORARY : 0); 4839219089Spjd 4840219089Spjd /* 4841219089Spjd * We have to grab the zs_name_lock as writer to 4842219089Spjd * prevent a race between offlining a slog and 4843219089Spjd * destroying a dataset. Offlining the slog will 4844219089Spjd * grab a reference on the dataset which may cause 4845219089Spjd * dmu_objset_destroy() to fail with EBUSY thus 4846219089Spjd * leaving the dataset in an inconsistent state. 4847219089Spjd */ 4848219089Spjd if (islog) 4849236143Smm (void) rw_wrlock(&ztest_name_lock); 4850219089Spjd 4851209962Smm VERIFY(vdev_offline(spa, guid0, flags) != EBUSY); 4852219089Spjd 4853219089Spjd if (islog) 4854236143Smm (void) rw_unlock(&ztest_name_lock); 4855209962Smm } else { 4856242845Sdelphij /* 4857242845Sdelphij * Ideally we would like to be able to randomly 4858242845Sdelphij * call vdev_[on|off]line without holding locks 4859242845Sdelphij * to force unpredictable failures but the side 4860242845Sdelphij * effects of vdev_[on|off]line prevent us from 4861242845Sdelphij * doing so. We grab the ztest_vdev_lock here to 4862242845Sdelphij * prevent a race between injection testing and 4863242845Sdelphij * aux_vdev removal. 4864242845Sdelphij */ 4865242845Sdelphij VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4866209962Smm (void) vdev_online(spa, guid0, 0, NULL); 4867242845Sdelphij VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4868209962Smm } 4869168404Spjd } 4870168404Spjd 4871219089Spjd if (maxfaults == 0) 4872219089Spjd return; 4873219089Spjd 4874168404Spjd /* 4875168404Spjd * We have at least single-fault tolerance, so inject data corruption. 4876168404Spjd */ 4877168404Spjd fd = open(pathrand, O_RDWR); 4878168404Spjd 4879168404Spjd if (fd == -1) /* we hit a gap in the device namespace */ 4880168404Spjd return; 4881168404Spjd 4882168404Spjd fsize = lseek(fd, 0, SEEK_END); 4883168404Spjd 4884168404Spjd while (--iters != 0) { 4885168404Spjd offset = ztest_random(fsize / (leaves << bshift)) * 4886168404Spjd (leaves << bshift) + (leaf << bshift) + 4887168404Spjd (ztest_random(1ULL << (bshift - 1)) & -8ULL); 4888168404Spjd 4889168404Spjd if (offset >= fsize) 4890168404Spjd continue; 4891168404Spjd 4892236143Smm VERIFY(mutex_lock(&ztest_vdev_lock) == 0); 4893219089Spjd if (mirror_save != zs->zs_mirrors) { 4894236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4895219089Spjd (void) close(fd); 4896219089Spjd return; 4897219089Spjd } 4898168404Spjd 4899168404Spjd if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad)) 4900168404Spjd fatal(1, "can't inject bad word at 0x%llx in %s", 4901168404Spjd offset, pathrand); 4902219089Spjd 4903236143Smm VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); 4904219089Spjd 4905236143Smm if (ztest_opts.zo_verbose >= 7) 4906219089Spjd (void) printf("injected bad word into %s," 4907219089Spjd " offset 0x%llx\n", pathrand, (u_longlong_t)offset); 4908168404Spjd } 4909168404Spjd 4910168404Spjd (void) close(fd); 4911168404Spjd} 4912168404Spjd 4913168404Spjd/* 4914219089Spjd * Verify that DDT repair works as expected. 4915219089Spjd */ 4916219089Spjdvoid 4917219089Spjdztest_ddt_repair(ztest_ds_t *zd, uint64_t id) 4918219089Spjd{ 4919219089Spjd ztest_shared_t *zs = ztest_shared; 4920236143Smm spa_t *spa = ztest_spa; 4921219089Spjd objset_t *os = zd->zd_os; 4922219089Spjd ztest_od_t od[1]; 4923219089Spjd uint64_t object, blocksize, txg, pattern, psize; 4924219089Spjd enum zio_checksum checksum = spa_dedup_checksum(spa); 4925219089Spjd dmu_buf_t *db; 4926219089Spjd dmu_tx_t *tx; 4927219089Spjd void *buf; 4928219089Spjd blkptr_t blk; 4929219089Spjd int copies = 2 * ZIO_DEDUPDITTO_MIN; 4930219089Spjd 4931219089Spjd blocksize = ztest_random_blocksize(); 4932219089Spjd blocksize = MIN(blocksize, 2048); /* because we write so many */ 4933219089Spjd 4934219089Spjd ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0); 4935219089Spjd 4936219089Spjd if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0) 4937219089Spjd return; 4938219089Spjd 4939219089Spjd /* 4940219089Spjd * Take the name lock as writer to prevent anyone else from changing 4941219089Spjd * the pool and dataset properies we need to maintain during this test. 4942219089Spjd */ 4943236143Smm (void) rw_wrlock(&ztest_name_lock); 4944219089Spjd 4945219089Spjd if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum, 4946219089Spjd B_FALSE) != 0 || 4947219089Spjd ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1, 4948219089Spjd B_FALSE) != 0) { 4949236143Smm (void) rw_unlock(&ztest_name_lock); 4950219089Spjd return; 4951219089Spjd } 4952219089Spjd 4953219089Spjd object = od[0].od_object; 4954219089Spjd blocksize = od[0].od_blocksize; 4955228103Smm pattern = zs->zs_guid ^ dmu_objset_fsid_guid(os); 4956219089Spjd 4957219089Spjd ASSERT(object != 0); 4958219089Spjd 4959219089Spjd tx = dmu_tx_create(os); 4960219089Spjd dmu_tx_hold_write(tx, object, 0, copies * blocksize); 4961219089Spjd txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); 4962219089Spjd if (txg == 0) { 4963236143Smm (void) rw_unlock(&ztest_name_lock); 4964219089Spjd return; 4965219089Spjd } 4966219089Spjd 4967219089Spjd /* 4968219089Spjd * Write all the copies of our block. 4969219089Spjd */ 4970219089Spjd for (int i = 0; i < copies; i++) { 4971219089Spjd uint64_t offset = i * blocksize; 4972248571Smm int error = dmu_buf_hold(os, object, offset, FTAG, &db, 4973248571Smm DMU_READ_NO_PREFETCH); 4974248571Smm if (error != 0) { 4975248571Smm fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u", 4976248571Smm os, (long long)object, (long long) offset, error); 4977248571Smm } 4978219089Spjd ASSERT(db->db_offset == offset); 4979219089Spjd ASSERT(db->db_size == blocksize); 4980219089Spjd ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) || 4981219089Spjd ztest_pattern_match(db->db_data, db->db_size, 0ULL)); 4982219089Spjd dmu_buf_will_fill(db, tx); 4983219089Spjd ztest_pattern_set(db->db_data, db->db_size, pattern); 4984219089Spjd dmu_buf_rele(db, FTAG); 4985219089Spjd } 4986219089Spjd 4987219089Spjd dmu_tx_commit(tx); 4988219089Spjd txg_wait_synced(spa_get_dsl(spa), txg); 4989219089Spjd 4990219089Spjd /* 4991219089Spjd * Find out what block we got. 4992219089Spjd */ 4993243524Smm VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db, 4994243524Smm DMU_READ_NO_PREFETCH)); 4995219089Spjd blk = *((dmu_buf_impl_t *)db)->db_blkptr; 4996219089Spjd dmu_buf_rele(db, FTAG); 4997219089Spjd 4998219089Spjd /* 4999219089Spjd * Damage the block. Dedup-ditto will save us when we read it later. 5000219089Spjd */ 5001219089Spjd psize = BP_GET_PSIZE(&blk); 5002219089Spjd buf = zio_buf_alloc(psize); 5003219089Spjd ztest_pattern_set(buf, psize, ~pattern); 5004219089Spjd 5005219089Spjd (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk, 5006219089Spjd buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, 5007219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL)); 5008219089Spjd 5009219089Spjd zio_buf_free(buf, psize); 5010219089Spjd 5011236143Smm (void) rw_unlock(&ztest_name_lock); 5012219089Spjd} 5013219089Spjd 5014219089Spjd/* 5015168404Spjd * Scrub the pool. 5016168404Spjd */ 5017219089Spjd/* ARGSUSED */ 5018168404Spjdvoid 5019219089Spjdztest_scrub(ztest_ds_t *zd, uint64_t id) 5020168404Spjd{ 5021236143Smm spa_t *spa = ztest_spa; 5022168404Spjd 5023219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5024219089Spjd (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */ 5025219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5026168404Spjd} 5027168404Spjd 5028168404Spjd/* 5029228103Smm * Change the guid for the pool. 5030228103Smm */ 5031228103Smm/* ARGSUSED */ 5032228103Smmvoid 5033228103Smmztest_reguid(ztest_ds_t *zd, uint64_t id) 5034228103Smm{ 5035236143Smm spa_t *spa = ztest_spa; 5036228103Smm uint64_t orig, load; 5037239620Smm int error; 5038228103Smm 5039228103Smm orig = spa_guid(spa); 5040228103Smm load = spa_load_guid(spa); 5041239620Smm 5042239620Smm (void) rw_wrlock(&ztest_name_lock); 5043239620Smm error = spa_change_guid(spa); 5044239620Smm (void) rw_unlock(&ztest_name_lock); 5045239620Smm 5046239620Smm if (error != 0) 5047228103Smm return; 5048228103Smm 5049243505Smm if (ztest_opts.zo_verbose >= 4) { 5050228103Smm (void) printf("Changed guid old %llu -> %llu\n", 5051228103Smm (u_longlong_t)orig, (u_longlong_t)spa_guid(spa)); 5052228103Smm } 5053228103Smm 5054228103Smm VERIFY3U(orig, !=, spa_guid(spa)); 5055228103Smm VERIFY3U(load, ==, spa_load_guid(spa)); 5056228103Smm} 5057228103Smm 5058228103Smm/* 5059168404Spjd * Rename the pool to a different name and then rename it back. 5060168404Spjd */ 5061219089Spjd/* ARGSUSED */ 5062168404Spjdvoid 5063219089Spjdztest_spa_rename(ztest_ds_t *zd, uint64_t id) 5064168404Spjd{ 5065168404Spjd char *oldname, *newname; 5066168404Spjd spa_t *spa; 5067168404Spjd 5068236143Smm (void) rw_wrlock(&ztest_name_lock); 5069168404Spjd 5070236143Smm oldname = ztest_opts.zo_pool; 5071168404Spjd newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL); 5072168404Spjd (void) strcpy(newname, oldname); 5073168404Spjd (void) strcat(newname, "_tmp"); 5074168404Spjd 5075168404Spjd /* 5076168404Spjd * Do the rename 5077168404Spjd */ 5078219089Spjd VERIFY3U(0, ==, spa_rename(oldname, newname)); 5079168404Spjd 5080168404Spjd /* 5081168404Spjd * Try to open it under the old name, which shouldn't exist 5082168404Spjd */ 5083219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5084168404Spjd 5085168404Spjd /* 5086168404Spjd * Open it under the new name and make sure it's still the same spa_t. 5087168404Spjd */ 5088219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5089168404Spjd 5090236143Smm ASSERT(spa == ztest_spa); 5091168404Spjd spa_close(spa, FTAG); 5092168404Spjd 5093168404Spjd /* 5094168404Spjd * Rename it back to the original 5095168404Spjd */ 5096219089Spjd VERIFY3U(0, ==, spa_rename(newname, oldname)); 5097168404Spjd 5098168404Spjd /* 5099168404Spjd * Make sure it can still be opened 5100168404Spjd */ 5101219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5102168404Spjd 5103236143Smm ASSERT(spa == ztest_spa); 5104168404Spjd spa_close(spa, FTAG); 5105168404Spjd 5106168404Spjd umem_free(newname, strlen(newname) + 1); 5107168404Spjd 5108236143Smm (void) rw_unlock(&ztest_name_lock); 5109168404Spjd} 5110168404Spjd 5111168404Spjd/* 5112219089Spjd * Verify pool integrity by running zdb. 5113168404Spjd */ 5114168404Spjdstatic void 5115219089Spjdztest_run_zdb(char *pool) 5116168404Spjd{ 5117168404Spjd int status; 5118168404Spjd char zdb[MAXPATHLEN + MAXNAMELEN + 20]; 5119168404Spjd char zbuf[1024]; 5120168404Spjd char *bin; 5121185029Spjd char *ztest; 5122185029Spjd char *isa; 5123185029Spjd int isalen; 5124168404Spjd FILE *fp; 5125168404Spjd 5126214623Spjd strlcpy(zdb, "/usr/bin/ztest", sizeof(zdb)); 5127168404Spjd 5128168404Spjd /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */ 5129168404Spjd bin = strstr(zdb, "/usr/bin/"); 5130185029Spjd ztest = strstr(bin, "/ztest"); 5131185029Spjd isa = bin + 8; 5132185029Spjd isalen = ztest - isa; 5133185029Spjd isa = strdup(isa); 5134168404Spjd /* LINTED */ 5135185029Spjd (void) sprintf(bin, 5136219089Spjd "/usr/sbin%.*s/zdb -bcc%s%s -U %s %s", 5137185029Spjd isalen, 5138185029Spjd isa, 5139236143Smm ztest_opts.zo_verbose >= 3 ? "s" : "", 5140236143Smm ztest_opts.zo_verbose >= 4 ? "v" : "", 5141219089Spjd spa_config_path, 5142208047Smm pool); 5143185029Spjd free(isa); 5144168404Spjd 5145236143Smm if (ztest_opts.zo_verbose >= 5) 5146168404Spjd (void) printf("Executing %s\n", strstr(zdb, "zdb ")); 5147168404Spjd 5148168404Spjd fp = popen(zdb, "r"); 5149168404Spjd assert(fp != NULL); 5150168404Spjd 5151168404Spjd while (fgets(zbuf, sizeof (zbuf), fp) != NULL) 5152236143Smm if (ztest_opts.zo_verbose >= 3) 5153168404Spjd (void) printf("%s", zbuf); 5154168404Spjd 5155168404Spjd status = pclose(fp); 5156168404Spjd 5157168404Spjd if (status == 0) 5158168404Spjd return; 5159168404Spjd 5160168404Spjd ztest_dump_core = 0; 5161168404Spjd if (WIFEXITED(status)) 5162168404Spjd fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status)); 5163168404Spjd else 5164168404Spjd fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status)); 5165168404Spjd} 5166168404Spjd 5167168404Spjdstatic void 5168168404Spjdztest_walk_pool_directory(char *header) 5169168404Spjd{ 5170168404Spjd spa_t *spa = NULL; 5171168404Spjd 5172236143Smm if (ztest_opts.zo_verbose >= 6) 5173168404Spjd (void) printf("%s\n", header); 5174168404Spjd 5175168404Spjd mutex_enter(&spa_namespace_lock); 5176168404Spjd while ((spa = spa_next(spa)) != NULL) 5177236143Smm if (ztest_opts.zo_verbose >= 6) 5178168404Spjd (void) printf("\t%s\n", spa_name(spa)); 5179168404Spjd mutex_exit(&spa_namespace_lock); 5180168404Spjd} 5181168404Spjd 5182168404Spjdstatic void 5183168404Spjdztest_spa_import_export(char *oldname, char *newname) 5184168404Spjd{ 5185209962Smm nvlist_t *config, *newconfig; 5186168404Spjd uint64_t pool_guid; 5187168404Spjd spa_t *spa; 5188248571Smm int error; 5189168404Spjd 5190236143Smm if (ztest_opts.zo_verbose >= 4) { 5191168404Spjd (void) printf("import/export: old = %s, new = %s\n", 5192168404Spjd oldname, newname); 5193168404Spjd } 5194168404Spjd 5195168404Spjd /* 5196168404Spjd * Clean up from previous runs. 5197168404Spjd */ 5198168404Spjd (void) spa_destroy(newname); 5199168404Spjd 5200168404Spjd /* 5201168404Spjd * Get the pool's configuration and guid. 5202168404Spjd */ 5203219089Spjd VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG)); 5204168404Spjd 5205209962Smm /* 5206209962Smm * Kick off a scrub to tickle scrub/export races. 5207209962Smm */ 5208209962Smm if (ztest_random(2) == 0) 5209219089Spjd (void) spa_scan(spa, POOL_SCAN_SCRUB); 5210209962Smm 5211168404Spjd pool_guid = spa_guid(spa); 5212168404Spjd spa_close(spa, FTAG); 5213168404Spjd 5214168404Spjd ztest_walk_pool_directory("pools before export"); 5215168404Spjd 5216168404Spjd /* 5217168404Spjd * Export it. 5218168404Spjd */ 5219219089Spjd VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE)); 5220168404Spjd 5221168404Spjd ztest_walk_pool_directory("pools after export"); 5222168404Spjd 5223168404Spjd /* 5224209962Smm * Try to import it. 5225209962Smm */ 5226209962Smm newconfig = spa_tryimport(config); 5227209962Smm ASSERT(newconfig != NULL); 5228209962Smm nvlist_free(newconfig); 5229209962Smm 5230209962Smm /* 5231168404Spjd * Import it under the new name. 5232168404Spjd */ 5233248571Smm error = spa_import(newname, config, NULL, 0); 5234248571Smm if (error != 0) { 5235248571Smm dump_nvlist(config, 0); 5236248571Smm fatal(B_FALSE, "couldn't import pool %s as %s: error %u", 5237248571Smm oldname, newname, error); 5238248571Smm } 5239168404Spjd 5240168404Spjd ztest_walk_pool_directory("pools after import"); 5241168404Spjd 5242168404Spjd /* 5243168404Spjd * Try to import it again -- should fail with EEXIST. 5244168404Spjd */ 5245219089Spjd VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0)); 5246168404Spjd 5247168404Spjd /* 5248168404Spjd * Try to import it under a different name -- should fail with EEXIST. 5249168404Spjd */ 5250219089Spjd VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0)); 5251168404Spjd 5252168404Spjd /* 5253168404Spjd * Verify that the pool is no longer visible under the old name. 5254168404Spjd */ 5255219089Spjd VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG)); 5256168404Spjd 5257168404Spjd /* 5258168404Spjd * Verify that we can open and close the pool using the new name. 5259168404Spjd */ 5260219089Spjd VERIFY3U(0, ==, spa_open(newname, &spa, FTAG)); 5261168404Spjd ASSERT(pool_guid == spa_guid(spa)); 5262168404Spjd spa_close(spa, FTAG); 5263168404Spjd 5264168404Spjd nvlist_free(config); 5265168404Spjd} 5266168404Spjd 5267209962Smmstatic void 5268209962Smmztest_resume(spa_t *spa) 5269209962Smm{ 5270236143Smm if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6) 5271219089Spjd (void) printf("resuming from suspended state\n"); 5272219089Spjd spa_vdev_state_enter(spa, SCL_NONE); 5273219089Spjd vdev_clear(spa, NULL); 5274219089Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 5275219089Spjd (void) zio_resume(spa); 5276209962Smm} 5277209962Smm 5278168404Spjdstatic void * 5279209962Smmztest_resume_thread(void *arg) 5280185029Spjd{ 5281185029Spjd spa_t *spa = arg; 5282185029Spjd 5283185029Spjd while (!ztest_exiting) { 5284219089Spjd if (spa_suspended(spa)) 5285219089Spjd ztest_resume(spa); 5286219089Spjd (void) poll(NULL, 0, 100); 5287185029Spjd } 5288185029Spjd return (NULL); 5289185029Spjd} 5290185029Spjd 5291185029Spjdstatic void * 5292219089Spjdztest_deadman_thread(void *arg) 5293219089Spjd{ 5294219089Spjd ztest_shared_t *zs = arg; 5295219089Spjd int grace = 300; 5296219089Spjd hrtime_t delta; 5297219089Spjd 5298219089Spjd delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace; 5299219089Spjd 5300219089Spjd (void) poll(NULL, 0, (int)(1000 * delta)); 5301219089Spjd 5302219089Spjd fatal(0, "failed to complete within %d seconds of deadline", grace); 5303219089Spjd 5304219089Spjd return (NULL); 5305219089Spjd} 5306219089Spjd 5307219089Spjdstatic void 5308236143Smmztest_execute(int test, ztest_info_t *zi, uint64_t id) 5309219089Spjd{ 5310236143Smm ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets]; 5311236143Smm ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test); 5312219089Spjd hrtime_t functime = gethrtime(); 5313219089Spjd 5314219089Spjd for (int i = 0; i < zi->zi_iters; i++) 5315219089Spjd zi->zi_func(zd, id); 5316219089Spjd 5317219089Spjd functime = gethrtime() - functime; 5318219089Spjd 5319236143Smm atomic_add_64(&zc->zc_count, 1); 5320236143Smm atomic_add_64(&zc->zc_time, functime); 5321219089Spjd 5322236143Smm if (ztest_opts.zo_verbose >= 4) { 5323219089Spjd Dl_info dli; 5324219089Spjd (void) dladdr((void *)zi->zi_func, &dli); 5325219089Spjd (void) printf("%6.2f sec in %s\n", 5326219089Spjd (double)functime / NANOSEC, dli.dli_sname); 5327219089Spjd } 5328219089Spjd} 5329219089Spjd 5330219089Spjdstatic void * 5331168404Spjdztest_thread(void *arg) 5332168404Spjd{ 5333236143Smm int rand; 5334219089Spjd uint64_t id = (uintptr_t)arg; 5335168404Spjd ztest_shared_t *zs = ztest_shared; 5336219089Spjd uint64_t call_next; 5337219089Spjd hrtime_t now; 5338168404Spjd ztest_info_t *zi; 5339236143Smm ztest_shared_callstate_t *zc; 5340168404Spjd 5341219089Spjd while ((now = gethrtime()) < zs->zs_thread_stop) { 5342168404Spjd /* 5343168404Spjd * See if it's time to force a crash. 5344168404Spjd */ 5345219089Spjd if (now > zs->zs_thread_kill) 5346219089Spjd ztest_kill(zs); 5347168404Spjd 5348168404Spjd /* 5349219089Spjd * If we're getting ENOSPC with some regularity, stop. 5350168404Spjd */ 5351219089Spjd if (zs->zs_enospc_count > 10) 5352219089Spjd break; 5353168404Spjd 5354168404Spjd /* 5355219089Spjd * Pick a random function to execute. 5356168404Spjd */ 5357236143Smm rand = ztest_random(ZTEST_FUNCS); 5358236143Smm zi = &ztest_info[rand]; 5359236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(rand); 5360236143Smm call_next = zc->zc_next; 5361168404Spjd 5362219089Spjd if (now >= call_next && 5363236143Smm atomic_cas_64(&zc->zc_next, call_next, call_next + 5364236143Smm ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) { 5365236143Smm ztest_execute(rand, zi, id); 5366236143Smm } 5367219089Spjd } 5368168404Spjd 5369219089Spjd return (NULL); 5370219089Spjd} 5371168404Spjd 5372219089Spjdstatic void 5373219089Spjdztest_dataset_name(char *dsname, char *pool, int d) 5374219089Spjd{ 5375219089Spjd (void) snprintf(dsname, MAXNAMELEN, "%s/ds_%d", pool, d); 5376219089Spjd} 5377168404Spjd 5378219089Spjdstatic void 5379236143Smmztest_dataset_destroy(int d) 5380219089Spjd{ 5381219089Spjd char name[MAXNAMELEN]; 5382168404Spjd 5383236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5384168404Spjd 5385236143Smm if (ztest_opts.zo_verbose >= 3) 5386219089Spjd (void) printf("Destroying %s to free up space\n", name); 5387168404Spjd 5388219089Spjd /* 5389219089Spjd * Cleanup any non-standard clones and snapshots. In general, 5390219089Spjd * ztest thread t operates on dataset (t % zopt_datasets), 5391219089Spjd * so there may be more than one thing to clean up. 5392219089Spjd */ 5393236143Smm for (int t = d; t < ztest_opts.zo_threads; 5394236143Smm t += ztest_opts.zo_datasets) { 5395219089Spjd ztest_dsl_dataset_cleanup(name, t); 5396236143Smm } 5397219089Spjd 5398219089Spjd (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL, 5399219089Spjd DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); 5400219089Spjd} 5401219089Spjd 5402219089Spjdstatic void 5403219089Spjdztest_dataset_dirobj_verify(ztest_ds_t *zd) 5404219089Spjd{ 5405219089Spjd uint64_t usedobjs, dirobjs, scratch; 5406219089Spjd 5407219089Spjd /* 5408219089Spjd * ZTEST_DIROBJ is the object directory for the entire dataset. 5409219089Spjd * Therefore, the number of objects in use should equal the 5410219089Spjd * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself. 5411219089Spjd * If not, we have an object leak. 5412219089Spjd * 5413219089Spjd * Note that we can only check this in ztest_dataset_open(), 5414219089Spjd * when the open-context and syncing-context values agree. 5415219089Spjd * That's because zap_count() returns the open-context value, 5416219089Spjd * while dmu_objset_space() returns the rootbp fill count. 5417219089Spjd */ 5418219089Spjd VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs)); 5419219089Spjd dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch); 5420219089Spjd ASSERT3U(dirobjs + 1, ==, usedobjs); 5421219089Spjd} 5422219089Spjd 5423219089Spjdstatic int 5424236143Smmztest_dataset_open(int d) 5425219089Spjd{ 5426236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5427236143Smm uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq; 5428219089Spjd objset_t *os; 5429219089Spjd zilog_t *zilog; 5430219089Spjd char name[MAXNAMELEN]; 5431219089Spjd int error; 5432219089Spjd 5433236143Smm ztest_dataset_name(name, ztest_opts.zo_pool, d); 5434219089Spjd 5435236143Smm (void) rw_rdlock(&ztest_name_lock); 5436219089Spjd 5437219089Spjd error = ztest_dataset_create(name); 5438219089Spjd if (error == ENOSPC) { 5439236143Smm (void) rw_unlock(&ztest_name_lock); 5440219089Spjd ztest_record_enospc(FTAG); 5441219089Spjd return (error); 5442168404Spjd } 5443219089Spjd ASSERT(error == 0 || error == EEXIST); 5444168404Spjd 5445248571Smm VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); 5446236143Smm (void) rw_unlock(&ztest_name_lock); 5447219089Spjd 5448236143Smm ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); 5449219089Spjd 5450219089Spjd zilog = zd->zd_zilog; 5451219089Spjd 5452219089Spjd if (zilog->zl_header->zh_claim_lr_seq != 0 && 5453219089Spjd zilog->zl_header->zh_claim_lr_seq < committed_seq) 5454219089Spjd fatal(0, "missing log records: claimed %llu < committed %llu", 5455219089Spjd zilog->zl_header->zh_claim_lr_seq, committed_seq); 5456219089Spjd 5457219089Spjd ztest_dataset_dirobj_verify(zd); 5458219089Spjd 5459219089Spjd zil_replay(os, zd, ztest_replay_vector); 5460219089Spjd 5461219089Spjd ztest_dataset_dirobj_verify(zd); 5462219089Spjd 5463236143Smm if (ztest_opts.zo_verbose >= 6) 5464219089Spjd (void) printf("%s replay %llu blocks, %llu records, seq %llu\n", 5465219089Spjd zd->zd_name, 5466219089Spjd (u_longlong_t)zilog->zl_parse_blk_count, 5467219089Spjd (u_longlong_t)zilog->zl_parse_lr_count, 5468219089Spjd (u_longlong_t)zilog->zl_replaying_seq); 5469219089Spjd 5470219089Spjd zilog = zil_open(os, ztest_get_data); 5471219089Spjd 5472219089Spjd if (zilog->zl_replaying_seq != 0 && 5473219089Spjd zilog->zl_replaying_seq < committed_seq) 5474219089Spjd fatal(0, "missing log records: replayed %llu < committed %llu", 5475219089Spjd zilog->zl_replaying_seq, committed_seq); 5476219089Spjd 5477219089Spjd return (0); 5478168404Spjd} 5479168404Spjd 5480219089Spjdstatic void 5481236143Smmztest_dataset_close(int d) 5482219089Spjd{ 5483236143Smm ztest_ds_t *zd = &ztest_ds[d]; 5484219089Spjd 5485219089Spjd zil_close(zd->zd_zilog); 5486248571Smm dmu_objset_disown(zd->zd_os, zd); 5487219089Spjd 5488219089Spjd ztest_zd_fini(zd); 5489219089Spjd} 5490219089Spjd 5491168404Spjd/* 5492168404Spjd * Kick off threads to run tests on all datasets in parallel. 5493168404Spjd */ 5494168404Spjdstatic void 5495219089Spjdztest_run(ztest_shared_t *zs) 5496168404Spjd{ 5497219089Spjd thread_t *tid; 5498168404Spjd spa_t *spa; 5499228103Smm objset_t *os; 5500185029Spjd thread_t resume_tid; 5501219089Spjd int error; 5502168404Spjd 5503185029Spjd ztest_exiting = B_FALSE; 5504185029Spjd 5505168404Spjd /* 5506219089Spjd * Initialize parent/child shared state. 5507168404Spjd */ 5508236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5509236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5510168404Spjd 5511219089Spjd zs->zs_thread_start = gethrtime(); 5512236143Smm zs->zs_thread_stop = 5513236143Smm zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC; 5514219089Spjd zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop); 5515219089Spjd zs->zs_thread_kill = zs->zs_thread_stop; 5516236143Smm if (ztest_random(100) < ztest_opts.zo_killrate) { 5517236143Smm zs->zs_thread_kill -= 5518236143Smm ztest_random(ztest_opts.zo_passtime * NANOSEC); 5519236143Smm } 5520168404Spjd 5521219089Spjd (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL); 5522168404Spjd 5523219089Spjd list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t), 5524219089Spjd offsetof(ztest_cb_data_t, zcd_node)); 5525168404Spjd 5526168404Spjd /* 5527219089Spjd * Open our pool. 5528168404Spjd */ 5529219089Spjd kernel_init(FREAD | FWRITE); 5530248571Smm VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5531224177Smm spa->spa_debug = B_TRUE; 5532236143Smm ztest_spa = spa; 5533168404Spjd 5534248571Smm VERIFY0(dmu_objset_own(ztest_opts.zo_pool, 5535248571Smm DMU_OST_ANY, B_TRUE, FTAG, &os)); 5536228103Smm zs->zs_guid = dmu_objset_fsid_guid(os); 5537248571Smm dmu_objset_disown(os, FTAG); 5538228103Smm 5539219089Spjd spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; 5540168404Spjd 5541168404Spjd /* 5542209962Smm * We don't expect the pool to suspend unless maxfaults == 0, 5543209962Smm * in which case ztest_fault_inject() temporarily takes away 5544209962Smm * the only valid replica. 5545209962Smm */ 5546219089Spjd if (MAXFAULTS() == 0) 5547209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_WAIT; 5548209962Smm else 5549209962Smm spa->spa_failmode = ZIO_FAILURE_MODE_PANIC; 5550209962Smm 5551209962Smm /* 5552185029Spjd * Create a thread to periodically resume suspended I/O. 5553185029Spjd */ 5554209962Smm VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND, 5555185029Spjd &resume_tid) == 0); 5556185029Spjd 5557185029Spjd /* 5558219089Spjd * Create a deadman thread to abort() if we hang. 5559219089Spjd */ 5560219089Spjd VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND, 5561219089Spjd NULL) == 0); 5562219089Spjd 5563219089Spjd /* 5564168404Spjd * Verify that we can safely inquire about about any object, 5565168404Spjd * whether it's allocated or not. To make it interesting, 5566168404Spjd * we probe a 5-wide window around each power of two. 5567168404Spjd * This hits all edge cases, including zero and the max. 5568168404Spjd */ 5569219089Spjd for (int t = 0; t < 64; t++) { 5570219089Spjd for (int d = -5; d <= 5; d++) { 5571168404Spjd error = dmu_object_info(spa->spa_meta_objset, 5572168404Spjd (1ULL << t) + d, NULL); 5573168404Spjd ASSERT(error == 0 || error == ENOENT || 5574168404Spjd error == EINVAL); 5575168404Spjd } 5576168404Spjd } 5577168404Spjd 5578168404Spjd /* 5579219089Spjd * If we got any ENOSPC errors on the previous run, destroy something. 5580168404Spjd */ 5581219089Spjd if (zs->zs_enospc_count != 0) { 5582236143Smm int d = ztest_random(ztest_opts.zo_datasets); 5583236143Smm ztest_dataset_destroy(d); 5584219089Spjd } 5585168404Spjd zs->zs_enospc_count = 0; 5586168404Spjd 5587236143Smm tid = umem_zalloc(ztest_opts.zo_threads * sizeof (thread_t), 5588236143Smm UMEM_NOFAIL); 5589168404Spjd 5590236143Smm if (ztest_opts.zo_verbose >= 4) 5591168404Spjd (void) printf("starting main threads...\n"); 5592168404Spjd 5593219089Spjd /* 5594219089Spjd * Kick off all the tests that run in parallel. 5595219089Spjd */ 5596236143Smm for (int t = 0; t < ztest_opts.zo_threads; t++) { 5597236143Smm if (t < ztest_opts.zo_datasets && 5598236143Smm ztest_dataset_open(t) != 0) 5599219089Spjd return; 5600219089Spjd VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t, 5601219089Spjd THR_BOUND, &tid[t]) == 0); 5602219089Spjd } 5603168404Spjd 5604219089Spjd /* 5605219089Spjd * Wait for all of the tests to complete. We go in reverse order 5606219089Spjd * so we don't close datasets while threads are still using them. 5607219089Spjd */ 5608236143Smm for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) { 5609219089Spjd VERIFY(thr_join(tid[t], NULL, NULL) == 0); 5610236143Smm if (t < ztest_opts.zo_datasets) 5611236143Smm ztest_dataset_close(t); 5612219089Spjd } 5613185029Spjd 5614219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5615185029Spjd 5616219089Spjd zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 5617219089Spjd zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); 5618168404Spjd 5619236143Smm umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t)); 5620168404Spjd 5621219089Spjd /* Kill the resume thread */ 5622219089Spjd ztest_exiting = B_TRUE; 5623219089Spjd VERIFY(thr_join(resume_tid, NULL, NULL) == 0); 5624219089Spjd ztest_resume(spa); 5625219089Spjd 5626219089Spjd /* 5627219089Spjd * Right before closing the pool, kick off a bunch of async I/O; 5628219089Spjd * spa_close() should wait for it to complete. 5629219089Spjd */ 5630219089Spjd for (uint64_t object = 1; object < 50; object++) 5631219089Spjd dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20); 5632219089Spjd 5633219089Spjd spa_close(spa, FTAG); 5634219089Spjd 5635219089Spjd /* 5636219089Spjd * Verify that we can loop over all pools. 5637219089Spjd */ 5638219089Spjd mutex_enter(&spa_namespace_lock); 5639219089Spjd for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) 5640236143Smm if (ztest_opts.zo_verbose > 3) 5641219089Spjd (void) printf("spa_next: found %s\n", spa_name(spa)); 5642219089Spjd mutex_exit(&spa_namespace_lock); 5643219089Spjd 5644219089Spjd /* 5645219089Spjd * Verify that we can export the pool and reimport it under a 5646219089Spjd * different name. 5647219089Spjd */ 5648219089Spjd if (ztest_random(2) == 0) { 5649219089Spjd char name[MAXNAMELEN]; 5650236143Smm (void) snprintf(name, MAXNAMELEN, "%s_import", 5651236143Smm ztest_opts.zo_pool); 5652236143Smm ztest_spa_import_export(ztest_opts.zo_pool, name); 5653236143Smm ztest_spa_import_export(name, ztest_opts.zo_pool); 5654168404Spjd } 5655168404Spjd 5656219089Spjd kernel_fini(); 5657219089Spjd 5658219089Spjd list_destroy(&zcl.zcl_callbacks); 5659219089Spjd 5660219089Spjd (void) _mutex_destroy(&zcl.zcl_callbacks_lock); 5661219089Spjd 5662236143Smm (void) rwlock_destroy(&ztest_name_lock); 5663236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5664219089Spjd} 5665219089Spjd 5666219089Spjdstatic void 5667236143Smmztest_freeze(void) 5668219089Spjd{ 5669236143Smm ztest_ds_t *zd = &ztest_ds[0]; 5670219089Spjd spa_t *spa; 5671219089Spjd int numloops = 0; 5672219089Spjd 5673236143Smm if (ztest_opts.zo_verbose >= 3) 5674219089Spjd (void) printf("testing spa_freeze()...\n"); 5675168404Spjd 5676219089Spjd kernel_init(FREAD | FWRITE); 5677236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5678236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5679243524Smm spa->spa_debug = B_TRUE; 5680243524Smm ztest_spa = spa; 5681168404Spjd 5682168404Spjd /* 5683219089Spjd * Force the first log block to be transactionally allocated. 5684219089Spjd * We have to do this before we freeze the pool -- otherwise 5685219089Spjd * the log chain won't be anchored. 5686168404Spjd */ 5687219089Spjd while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) { 5688219089Spjd ztest_dmu_object_alloc_free(zd, 0); 5689219089Spjd zil_commit(zd->zd_zilog, 0); 5690168404Spjd } 5691168404Spjd 5692168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5693168404Spjd 5694219089Spjd /* 5695219089Spjd * Freeze the pool. This stops spa_sync() from doing anything, 5696219089Spjd * so that the only way to record changes from now on is the ZIL. 5697219089Spjd */ 5698219089Spjd spa_freeze(spa); 5699185029Spjd 5700219089Spjd /* 5701219089Spjd * Run tests that generate log records but don't alter the pool config 5702219089Spjd * or depend on DSL sync tasks (snapshots, objset create/destroy, etc). 5703219089Spjd * We do a txg_wait_synced() after each iteration to force the txg 5704219089Spjd * to increase well beyond the last synced value in the uberblock. 5705219089Spjd * The ZIL should be OK with that. 5706219089Spjd */ 5707236143Smm while (ztest_random(10) != 0 && 5708236143Smm numloops++ < ztest_opts.zo_maxloops) { 5709219089Spjd ztest_dmu_write_parallel(zd, 0); 5710219089Spjd ztest_dmu_object_alloc_free(zd, 0); 5711219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5712219089Spjd } 5713185029Spjd 5714168404Spjd /* 5715219089Spjd * Commit all of the changes we just generated. 5716168404Spjd */ 5717219089Spjd zil_commit(zd->zd_zilog, 0); 5718219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 5719168404Spjd 5720219089Spjd /* 5721219089Spjd * Close our dataset and close the pool. 5722219089Spjd */ 5723236143Smm ztest_dataset_close(0); 5724168404Spjd spa_close(spa, FTAG); 5725219089Spjd kernel_fini(); 5726168404Spjd 5727219089Spjd /* 5728219089Spjd * Open and close the pool and dataset to induce log replay. 5729219089Spjd */ 5730219089Spjd kernel_init(FREAD | FWRITE); 5731236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5732239620Smm ASSERT(spa_freeze_txg(spa) == UINT64_MAX); 5733236143Smm VERIFY3U(0, ==, ztest_dataset_open(0)); 5734236143Smm ztest_dataset_close(0); 5735239620Smm 5736239620Smm spa->spa_debug = B_TRUE; 5737239620Smm ztest_spa = spa; 5738239620Smm txg_wait_synced(spa_get_dsl(spa), 0); 5739239620Smm ztest_reguid(NULL, 0); 5740239620Smm 5741219089Spjd spa_close(spa, FTAG); 5742168404Spjd kernel_fini(); 5743168404Spjd} 5744168404Spjd 5745168404Spjdvoid 5746168404Spjdprint_time(hrtime_t t, char *timebuf) 5747168404Spjd{ 5748168404Spjd hrtime_t s = t / NANOSEC; 5749168404Spjd hrtime_t m = s / 60; 5750168404Spjd hrtime_t h = m / 60; 5751168404Spjd hrtime_t d = h / 24; 5752168404Spjd 5753168404Spjd s -= m * 60; 5754168404Spjd m -= h * 60; 5755168404Spjd h -= d * 24; 5756168404Spjd 5757168404Spjd timebuf[0] = '\0'; 5758168404Spjd 5759168404Spjd if (d) 5760168404Spjd (void) sprintf(timebuf, 5761168404Spjd "%llud%02lluh%02llum%02llus", d, h, m, s); 5762168404Spjd else if (h) 5763168404Spjd (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s); 5764168404Spjd else if (m) 5765168404Spjd (void) sprintf(timebuf, "%llum%02llus", m, s); 5766168404Spjd else 5767168404Spjd (void) sprintf(timebuf, "%llus", s); 5768168404Spjd} 5769168404Spjd 5770219089Spjdstatic nvlist_t * 5771219089Spjdmake_random_props() 5772219089Spjd{ 5773219089Spjd nvlist_t *props; 5774219089Spjd 5775236884Smm VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); 5776219089Spjd if (ztest_random(2) == 0) 5777236884Smm return (props); 5778219089Spjd VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0); 5779219089Spjd 5780219089Spjd return (props); 5781219089Spjd} 5782219089Spjd 5783168404Spjd/* 5784168404Spjd * Create a storage pool with the given name and initial vdev size. 5785219089Spjd * Then test spa_freeze() functionality. 5786168404Spjd */ 5787168404Spjdstatic void 5788219089Spjdztest_init(ztest_shared_t *zs) 5789168404Spjd{ 5790168404Spjd spa_t *spa; 5791219089Spjd nvlist_t *nvroot, *props; 5792168404Spjd 5793236143Smm VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0); 5794236143Smm VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0); 5795219089Spjd 5796168404Spjd kernel_init(FREAD | FWRITE); 5797168404Spjd 5798168404Spjd /* 5799168404Spjd * Create the storage pool. 5800168404Spjd */ 5801236143Smm (void) spa_destroy(ztest_opts.zo_pool); 5802219089Spjd ztest_shared->zs_vdev_next_leaf = 0; 5803219089Spjd zs->zs_splits = 0; 5804236143Smm zs->zs_mirrors = ztest_opts.zo_mirrors; 5805243505Smm nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, 5806236143Smm 0, ztest_opts.zo_raidz, zs->zs_mirrors, 1); 5807219089Spjd props = make_random_props(); 5808236884Smm for (int i = 0; i < SPA_FEATURES; i++) { 5809236884Smm char buf[1024]; 5810236884Smm (void) snprintf(buf, sizeof (buf), "feature@%s", 5811236884Smm spa_feature_table[i].fi_uname); 5812236884Smm VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); 5813236884Smm } 5814248571Smm VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); 5815168404Spjd nvlist_free(nvroot); 5816168404Spjd 5817236143Smm VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); 5818236143Smm zs->zs_metaslab_sz = 5819236143Smm 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; 5820236884Smm 5821219089Spjd spa_close(spa, FTAG); 5822209962Smm 5823219089Spjd kernel_fini(); 5824168404Spjd 5825236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5826168404Spjd 5827236143Smm ztest_freeze(); 5828219089Spjd 5829236143Smm ztest_run_zdb(ztest_opts.zo_pool); 5830219089Spjd 5831236143Smm (void) rwlock_destroy(&ztest_name_lock); 5832236143Smm (void) _mutex_destroy(&ztest_vdev_lock); 5833168404Spjd} 5834168404Spjd 5835236143Smmstatic void 5836242845Sdelphijsetup_data_fd(void) 5837236143Smm{ 5838242845Sdelphij static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX"; 5839236143Smm 5840242845Sdelphij ztest_fd_data = mkstemp(ztest_name_data); 5841242845Sdelphij ASSERT3S(ztest_fd_data, >=, 0); 5842242845Sdelphij (void) unlink(ztest_name_data); 5843242845Sdelphij} 5844236143Smm 5845236143Smm 5846236884Smmstatic int 5847236884Smmshared_data_size(ztest_shared_hdr_t *hdr) 5848236884Smm{ 5849236884Smm int size; 5850236884Smm 5851236884Smm size = hdr->zh_hdr_size; 5852236884Smm size += hdr->zh_opts_size; 5853236884Smm size += hdr->zh_size; 5854236884Smm size += hdr->zh_stats_size * hdr->zh_stats_count; 5855236884Smm size += hdr->zh_ds_size * hdr->zh_ds_count; 5856236884Smm 5857236884Smm return (size); 5858236884Smm} 5859236884Smm 5860236143Smmstatic void 5861236143Smmsetup_hdr(void) 5862236143Smm{ 5863236884Smm int size; 5864236143Smm ztest_shared_hdr_t *hdr; 5865236143Smm 5866236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5867242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 5868236143Smm ASSERT(hdr != MAP_FAILED); 5869236143Smm 5870242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t))); 5871236884Smm 5872236143Smm hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t); 5873236143Smm hdr->zh_opts_size = sizeof (ztest_shared_opts_t); 5874236143Smm hdr->zh_size = sizeof (ztest_shared_t); 5875236143Smm hdr->zh_stats_size = sizeof (ztest_shared_callstate_t); 5876236143Smm hdr->zh_stats_count = ZTEST_FUNCS; 5877236143Smm hdr->zh_ds_size = sizeof (ztest_shared_ds_t); 5878236143Smm hdr->zh_ds_count = ztest_opts.zo_datasets; 5879236143Smm 5880236884Smm size = shared_data_size(hdr); 5881242845Sdelphij VERIFY3U(0, ==, ftruncate(ztest_fd_data, size)); 5882236884Smm 5883236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5884236143Smm} 5885236143Smm 5886236143Smmstatic void 5887236143Smmsetup_data(void) 5888236143Smm{ 5889236143Smm int size, offset; 5890236143Smm ztest_shared_hdr_t *hdr; 5891236143Smm uint8_t *buf; 5892236143Smm 5893236143Smm hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()), 5894242845Sdelphij PROT_READ, MAP_SHARED, ztest_fd_data, 0); 5895236143Smm ASSERT(hdr != MAP_FAILED); 5896236143Smm 5897236884Smm size = shared_data_size(hdr); 5898236143Smm 5899236143Smm (void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize())); 5900236143Smm hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()), 5901242845Sdelphij PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0); 5902236143Smm ASSERT(hdr != MAP_FAILED); 5903236143Smm buf = (uint8_t *)hdr; 5904236143Smm 5905236143Smm offset = hdr->zh_hdr_size; 5906236143Smm ztest_shared_opts = (void *)&buf[offset]; 5907236143Smm offset += hdr->zh_opts_size; 5908236143Smm ztest_shared = (void *)&buf[offset]; 5909236143Smm offset += hdr->zh_size; 5910236143Smm ztest_shared_callstate = (void *)&buf[offset]; 5911236143Smm offset += hdr->zh_stats_size * hdr->zh_stats_count; 5912236143Smm ztest_shared_ds = (void *)&buf[offset]; 5913236143Smm} 5914236143Smm 5915236143Smmstatic boolean_t 5916236143Smmexec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp) 5917236143Smm{ 5918236143Smm pid_t pid; 5919236143Smm int status; 5920242845Sdelphij char *cmdbuf = NULL; 5921236143Smm 5922236143Smm pid = fork(); 5923236143Smm 5924236143Smm if (cmd == NULL) { 5925242845Sdelphij cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL); 5926242845Sdelphij (void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN); 5927236143Smm cmd = cmdbuf; 5928236143Smm } 5929236143Smm 5930236143Smm if (pid == -1) 5931236143Smm fatal(1, "fork failed"); 5932236143Smm 5933236143Smm if (pid == 0) { /* child */ 5934236143Smm char *emptyargv[2] = { cmd, NULL }; 5935242845Sdelphij char fd_data_str[12]; 5936236143Smm 5937236143Smm struct rlimit rl = { 1024, 1024 }; 5938236143Smm (void) setrlimit(RLIMIT_NOFILE, &rl); 5939242845Sdelphij 5940242845Sdelphij (void) close(ztest_fd_rand); 5941242845Sdelphij VERIFY3U(11, >=, 5942242845Sdelphij snprintf(fd_data_str, 12, "%d", ztest_fd_data)); 5943242845Sdelphij VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1)); 5944242845Sdelphij 5945236143Smm (void) enable_extended_FILE_stdio(-1, -1); 5946236143Smm if (libpath != NULL) 5947236143Smm VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1)); 5948236143Smm#ifdef illumos 5949236143Smm (void) execv(cmd, emptyargv); 5950236143Smm#else 5951236143Smm (void) execvp(cmd, emptyargv); 5952236143Smm#endif 5953236143Smm ztest_dump_core = B_FALSE; 5954236143Smm fatal(B_TRUE, "exec failed: %s", cmd); 5955236143Smm } 5956236143Smm 5957242845Sdelphij if (cmdbuf != NULL) { 5958242845Sdelphij umem_free(cmdbuf, MAXPATHLEN); 5959242845Sdelphij cmd = NULL; 5960242845Sdelphij } 5961242845Sdelphij 5962236143Smm while (waitpid(pid, &status, 0) != pid) 5963236143Smm continue; 5964236143Smm if (statusp != NULL) 5965236143Smm *statusp = status; 5966236143Smm 5967236143Smm if (WIFEXITED(status)) { 5968236143Smm if (WEXITSTATUS(status) != 0) { 5969236143Smm (void) fprintf(stderr, "child exited with code %d\n", 5970236143Smm WEXITSTATUS(status)); 5971236143Smm exit(2); 5972236143Smm } 5973236143Smm return (B_FALSE); 5974236143Smm } else if (WIFSIGNALED(status)) { 5975236143Smm if (!ignorekill || WTERMSIG(status) != SIGKILL) { 5976236143Smm (void) fprintf(stderr, "child died with signal %d\n", 5977236143Smm WTERMSIG(status)); 5978236143Smm exit(3); 5979236143Smm } 5980236143Smm return (B_TRUE); 5981236143Smm } else { 5982236143Smm (void) fprintf(stderr, "something strange happened to child\n"); 5983236143Smm exit(4); 5984236143Smm /* NOTREACHED */ 5985236143Smm } 5986236143Smm} 5987236143Smm 5988236143Smmstatic void 5989236143Smmztest_run_init(void) 5990236143Smm{ 5991236143Smm ztest_shared_t *zs = ztest_shared; 5992236143Smm 5993236143Smm ASSERT(ztest_opts.zo_init != 0); 5994236143Smm 5995236143Smm /* 5996236143Smm * Blow away any existing copy of zpool.cache 5997236143Smm */ 5998236143Smm (void) remove(spa_config_path); 5999236143Smm 6000236143Smm /* 6001236143Smm * Create and initialize our storage pool. 6002236143Smm */ 6003236143Smm for (int i = 1; i <= ztest_opts.zo_init; i++) { 6004236143Smm bzero(zs, sizeof (ztest_shared_t)); 6005236143Smm if (ztest_opts.zo_verbose >= 3 && 6006236143Smm ztest_opts.zo_init != 1) { 6007236143Smm (void) printf("ztest_init(), pass %d\n", i); 6008236143Smm } 6009236143Smm ztest_init(zs); 6010236143Smm } 6011236143Smm} 6012236143Smm 6013168404Spjdint 6014168404Spjdmain(int argc, char **argv) 6015168404Spjd{ 6016168404Spjd int kills = 0; 6017168404Spjd int iters = 0; 6018236143Smm int older = 0; 6019236143Smm int newer = 0; 6020168404Spjd ztest_shared_t *zs; 6021168404Spjd ztest_info_t *zi; 6022236143Smm ztest_shared_callstate_t *zc; 6023168404Spjd char timebuf[100]; 6024168404Spjd char numbuf[6]; 6025219089Spjd spa_t *spa; 6026242845Sdelphij char *cmd; 6027236143Smm boolean_t hasalt; 6028242845Sdelphij char *fd_data_str = getenv("ZTEST_FD_DATA"); 6029168404Spjd 6030168404Spjd (void) setvbuf(stdout, NULL, _IOLBF, 0); 6031168404Spjd 6032240133Smm dprintf_setup(&argc, argv); 6033240133Smm 6034242845Sdelphij ztest_fd_rand = open("/dev/urandom", O_RDONLY); 6035242845Sdelphij ASSERT3S(ztest_fd_rand, >=, 0); 6036242845Sdelphij 6037242845Sdelphij if (!fd_data_str) { 6038236143Smm process_options(argc, argv); 6039168404Spjd 6040242845Sdelphij setup_data_fd(); 6041236143Smm setup_hdr(); 6042236143Smm setup_data(); 6043236143Smm bcopy(&ztest_opts, ztest_shared_opts, 6044236143Smm sizeof (*ztest_shared_opts)); 6045236143Smm } else { 6046242845Sdelphij ztest_fd_data = atoi(fd_data_str); 6047236143Smm setup_data(); 6048236143Smm bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts)); 6049236143Smm } 6050236143Smm ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count); 6051168404Spjd 6052219089Spjd /* Override location of zpool.cache */ 6053242845Sdelphij VERIFY3U(asprintf((char **)&spa_config_path, "%s/zpool.cache", 6054242845Sdelphij ztest_opts.zo_dir), !=, -1); 6055219089Spjd 6056236143Smm ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t), 6057236143Smm UMEM_NOFAIL); 6058236143Smm zs = ztest_shared; 6059168404Spjd 6060242845Sdelphij if (fd_data_str) { 6061236143Smm metaslab_gang_bang = ztest_opts.zo_metaslab_gang_bang; 6062236143Smm metaslab_df_alloc_threshold = 6063236143Smm zs->zs_metaslab_df_alloc_threshold; 6064219089Spjd 6065236143Smm if (zs->zs_do_init) 6066236143Smm ztest_run_init(); 6067236143Smm else 6068236143Smm ztest_run(zs); 6069236143Smm exit(0); 6070236143Smm } 6071168404Spjd 6072236143Smm hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0); 6073236143Smm 6074236143Smm if (ztest_opts.zo_verbose >= 1) { 6075168404Spjd (void) printf("%llu vdevs, %d datasets, %d threads," 6076168404Spjd " %llu seconds...\n", 6077236143Smm (u_longlong_t)ztest_opts.zo_vdevs, 6078236143Smm ztest_opts.zo_datasets, 6079236143Smm ztest_opts.zo_threads, 6080236143Smm (u_longlong_t)ztest_opts.zo_time); 6081168404Spjd } 6082168404Spjd 6083242845Sdelphij cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL); 6084242845Sdelphij (void) strlcpy(cmd, getexecname(), MAXNAMELEN); 6085236143Smm 6086236143Smm zs->zs_do_init = B_TRUE; 6087236143Smm if (strlen(ztest_opts.zo_alt_ztest) != 0) { 6088236143Smm if (ztest_opts.zo_verbose >= 1) { 6089236143Smm (void) printf("Executing older ztest for " 6090236143Smm "initialization: %s\n", ztest_opts.zo_alt_ztest); 6091236143Smm } 6092236143Smm VERIFY(!exec_child(ztest_opts.zo_alt_ztest, 6093236143Smm ztest_opts.zo_alt_libpath, B_FALSE, NULL)); 6094236143Smm } else { 6095236143Smm VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL)); 6096168404Spjd } 6097236143Smm zs->zs_do_init = B_FALSE; 6098168404Spjd 6099219089Spjd zs->zs_proc_start = gethrtime(); 6100236143Smm zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC; 6101219089Spjd 6102219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6103236143Smm zi = &ztest_info[f]; 6104236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6105219089Spjd if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop) 6106236143Smm zc->zc_next = UINT64_MAX; 6107168404Spjd else 6108236143Smm zc->zc_next = zs->zs_proc_start + 6109219089Spjd ztest_random(2 * zi->zi_interval[0] + 1); 6110168404Spjd } 6111168404Spjd 6112168404Spjd /* 6113168404Spjd * Run the tests in a loop. These tests include fault injection 6114168404Spjd * to verify that self-healing data works, and forced crashes 6115168404Spjd * to verify that we never lose on-disk consistency. 6116168404Spjd */ 6117219089Spjd while (gethrtime() < zs->zs_proc_stop) { 6118168404Spjd int status; 6119236143Smm boolean_t killed; 6120168404Spjd 6121168404Spjd /* 6122168404Spjd * Initialize the workload counters for each function. 6123168404Spjd */ 6124219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6125236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6126236143Smm zc->zc_count = 0; 6127236143Smm zc->zc_time = 0; 6128168404Spjd } 6129168404Spjd 6130209962Smm /* Set the allocation switch size */ 6131236143Smm zs->zs_metaslab_df_alloc_threshold = 6132236143Smm ztest_random(zs->zs_metaslab_sz / 4) + 1; 6133209962Smm 6134236143Smm if (!hasalt || ztest_random(2) == 0) { 6135236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6136236143Smm (void) printf("Executing newer ztest: %s\n", 6137236143Smm cmd); 6138168404Spjd } 6139236143Smm newer++; 6140236143Smm killed = exec_child(cmd, NULL, B_TRUE, &status); 6141236143Smm } else { 6142236143Smm if (hasalt && ztest_opts.zo_verbose >= 1) { 6143236143Smm (void) printf("Executing older ztest: %s\n", 6144236143Smm ztest_opts.zo_alt_ztest); 6145168404Spjd } 6146236143Smm older++; 6147236143Smm killed = exec_child(ztest_opts.zo_alt_ztest, 6148236143Smm ztest_opts.zo_alt_libpath, B_TRUE, &status); 6149168404Spjd } 6150168404Spjd 6151236143Smm if (killed) 6152236143Smm kills++; 6153168404Spjd iters++; 6154168404Spjd 6155236143Smm if (ztest_opts.zo_verbose >= 1) { 6156168404Spjd hrtime_t now = gethrtime(); 6157168404Spjd 6158219089Spjd now = MIN(now, zs->zs_proc_stop); 6159219089Spjd print_time(zs->zs_proc_stop - now, timebuf); 6160168404Spjd nicenum(zs->zs_space, numbuf); 6161168404Spjd 6162168404Spjd (void) printf("Pass %3d, %8s, %3llu ENOSPC, " 6163168404Spjd "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n", 6164168404Spjd iters, 6165168404Spjd WIFEXITED(status) ? "Complete" : "SIGKILL", 6166168404Spjd (u_longlong_t)zs->zs_enospc_count, 6167168404Spjd 100.0 * zs->zs_alloc / zs->zs_space, 6168168404Spjd numbuf, 6169219089Spjd 100.0 * (now - zs->zs_proc_start) / 6170236143Smm (ztest_opts.zo_time * NANOSEC), timebuf); 6171168404Spjd } 6172168404Spjd 6173236143Smm if (ztest_opts.zo_verbose >= 2) { 6174168404Spjd (void) printf("\nWorkload summary:\n\n"); 6175168404Spjd (void) printf("%7s %9s %s\n", 6176168404Spjd "Calls", "Time", "Function"); 6177168404Spjd (void) printf("%7s %9s %s\n", 6178168404Spjd "-----", "----", "--------"); 6179219089Spjd for (int f = 0; f < ZTEST_FUNCS; f++) { 6180168404Spjd Dl_info dli; 6181168404Spjd 6182236143Smm zi = &ztest_info[f]; 6183236143Smm zc = ZTEST_GET_SHARED_CALLSTATE(f); 6184236143Smm print_time(zc->zc_time, timebuf); 6185168404Spjd (void) dladdr((void *)zi->zi_func, &dli); 6186168404Spjd (void) printf("%7llu %9s %s\n", 6187236143Smm (u_longlong_t)zc->zc_count, timebuf, 6188168404Spjd dli.dli_sname); 6189168404Spjd } 6190168404Spjd (void) printf("\n"); 6191168404Spjd } 6192168404Spjd 6193168404Spjd /* 6194219089Spjd * It's possible that we killed a child during a rename test, 6195219089Spjd * in which case we'll have a 'ztest_tmp' pool lying around 6196219089Spjd * instead of 'ztest'. Do a blind rename in case this happened. 6197168404Spjd */ 6198219089Spjd kernel_init(FREAD); 6199236143Smm if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) { 6200219089Spjd spa_close(spa, FTAG); 6201219089Spjd } else { 6202219089Spjd char tmpname[MAXNAMELEN]; 6203219089Spjd kernel_fini(); 6204219089Spjd kernel_init(FREAD | FWRITE); 6205219089Spjd (void) snprintf(tmpname, sizeof (tmpname), "%s_tmp", 6206236143Smm ztest_opts.zo_pool); 6207236143Smm (void) spa_rename(tmpname, ztest_opts.zo_pool); 6208219089Spjd } 6209168404Spjd kernel_fini(); 6210219089Spjd 6211236143Smm ztest_run_zdb(ztest_opts.zo_pool); 6212168404Spjd } 6213168404Spjd 6214236143Smm if (ztest_opts.zo_verbose >= 1) { 6215236143Smm if (hasalt) { 6216236143Smm (void) printf("%d runs of older ztest: %s\n", older, 6217236143Smm ztest_opts.zo_alt_ztest); 6218236143Smm (void) printf("%d runs of newer ztest: %s\n", newer, 6219236143Smm cmd); 6220236143Smm } 6221168404Spjd (void) printf("%d killed, %d completed, %.0f%% kill rate\n", 6222168404Spjd kills, iters - kills, (100.0 * kills) / MAX(1, iters)); 6223168404Spjd } 6224168404Spjd 6225242845Sdelphij umem_free(cmd, MAXNAMELEN); 6226242845Sdelphij 6227168404Spjd return (0); 6228168404Spjd} 6229