1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd 22168404Spjd/* 23219089Spjd * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24307108Smav * Copyright (c) 2011, 2015 by Delphix. All rights reserved. 25296519Smav * Copyright (c) 2014 Integros [integros.com] 26331397Smav * Copyright 2017 Joyent, Inc. 27168404Spjd */ 28168404Spjd 29185029Spjd#include <sys/spa.h> 30168404Spjd#include <sys/spa_impl.h> 31168404Spjd#include <sys/zap.h> 32168404Spjd#include <sys/dsl_synctask.h> 33185029Spjd#include <sys/dmu_tx.h> 34185029Spjd#include <sys/dmu_objset.h> 35248571Smm#include <sys/dsl_dataset.h> 36248571Smm#include <sys/dsl_dir.h> 37185029Spjd#include <sys/utsname.h> 38185029Spjd#include <sys/sunddi.h> 39248571Smm#include <sys/cred.h> 40219089Spjd#include "zfs_comutil.h" 41185029Spjd#ifdef _KERNEL 42185029Spjd#include <sys/cmn_err.h> 43185029Spjd#include <sys/zone.h> 44185029Spjd#endif 45168404Spjd 46168404Spjd/* 47168404Spjd * Routines to manage the on-disk history log. 48168404Spjd * 49168404Spjd * The history log is stored as a dmu object containing 50168404Spjd * <packed record length, record nvlist> tuples. 51168404Spjd * 52168404Spjd * Where "record nvlist" is a nvlist containing uint64_ts and strings, and 53168404Spjd * "packed record length" is the packed length of the "record nvlist" stored 54168404Spjd * as a little endian uint64_t. 55168404Spjd * 56168404Spjd * The log is implemented as a ring buffer, though the original creation 57168404Spjd * of the pool ('zpool create') is never overwritten. 58168404Spjd * 59168404Spjd * The history log is tracked as object 'spa_t::spa_history'. The bonus buffer 60168404Spjd * of 'spa_history' stores the offsets for logging/retrieving history as 61168404Spjd * 'spa_history_phys_t'. 'sh_pool_create_len' is the ending offset in bytes of 62168404Spjd * where the 'zpool create' record is stored. This allows us to never 63168404Spjd * overwrite the original creation of the pool. 'sh_phys_max_off' is the 64168404Spjd * physical ending offset in bytes of the log. This tells you the length of 65168404Spjd * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record 66168404Spjd * is added, 'sh_eof' is incremented by the the size of the record. 67168404Spjd * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes). 68168404Spjd * This is where the consumer should start reading from after reading in 69168404Spjd * the 'zpool create' portion of the log. 70168404Spjd * 71168404Spjd * 'sh_records_lost' keeps track of how many records have been overwritten 72168404Spjd * and permanently lost. 73168404Spjd */ 74168404Spjd 75168404Spjd/* convert a logical offset to physical */ 76168404Spjdstatic uint64_t 77168404Spjdspa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp) 78168404Spjd{ 79168404Spjd uint64_t phys_len; 80168404Spjd 81168404Spjd phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len; 82168404Spjd return ((log_off - shpp->sh_pool_create_len) % phys_len 83168404Spjd + shpp->sh_pool_create_len); 84168404Spjd} 85168404Spjd 86168404Spjdvoid 87168404Spjdspa_history_create_obj(spa_t *spa, dmu_tx_t *tx) 88168404Spjd{ 89168404Spjd dmu_buf_t *dbp; 90168404Spjd spa_history_phys_t *shpp; 91168404Spjd objset_t *mos = spa->spa_meta_objset; 92168404Spjd 93168404Spjd ASSERT(spa->spa_history == 0); 94168404Spjd spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY, 95274337Sdelphij SPA_OLD_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS, 96168404Spjd sizeof (spa_history_phys_t), tx); 97168404Spjd 98168404Spjd VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, 99168404Spjd DMU_POOL_HISTORY, sizeof (uint64_t), 1, 100168404Spjd &spa->spa_history, tx) == 0); 101168404Spjd 102168404Spjd VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); 103168404Spjd ASSERT(dbp->db_size >= sizeof (spa_history_phys_t)); 104168404Spjd 105168404Spjd shpp = dbp->db_data; 106168404Spjd dmu_buf_will_dirty(dbp, tx); 107168404Spjd 108168404Spjd /* 109168404Spjd * Figure out maximum size of history log. We set it at 110228103Smm * 0.1% of pool size, with a max of 1G and min of 128KB. 111168404Spjd */ 112219089Spjd shpp->sh_phys_max_off = 113228103Smm metaslab_class_get_dspace(spa_normal_class(spa)) / 1000; 114228103Smm shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30); 115168404Spjd shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10); 116168404Spjd 117168404Spjd dmu_buf_rele(dbp, FTAG); 118168404Spjd} 119168404Spjd 120168404Spjd/* 121168404Spjd * Change 'sh_bof' to the beginning of the next record. 122168404Spjd */ 123168404Spjdstatic int 124168404Spjdspa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp) 125168404Spjd{ 126168404Spjd objset_t *mos = spa->spa_meta_objset; 127168404Spjd uint64_t firstread, reclen, phys_bof; 128168404Spjd char buf[sizeof (reclen)]; 129168404Spjd int err; 130168404Spjd 131168404Spjd phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp); 132168404Spjd firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof); 133168404Spjd 134168404Spjd if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread, 135209962Smm buf, DMU_READ_PREFETCH)) != 0) 136168404Spjd return (err); 137168404Spjd if (firstread != sizeof (reclen)) { 138168404Spjd if ((err = dmu_read(mos, spa->spa_history, 139168404Spjd shpp->sh_pool_create_len, sizeof (reclen) - firstread, 140209962Smm buf + firstread, DMU_READ_PREFETCH)) != 0) 141168404Spjd return (err); 142168404Spjd } 143168404Spjd 144168404Spjd reclen = LE_64(*((uint64_t *)buf)); 145168404Spjd shpp->sh_bof += reclen + sizeof (reclen); 146168404Spjd shpp->sh_records_lost++; 147168404Spjd return (0); 148168404Spjd} 149168404Spjd 150168404Spjdstatic int 151168404Spjdspa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp, 152168404Spjd dmu_tx_t *tx) 153168404Spjd{ 154168404Spjd uint64_t firstwrite, phys_eof; 155168404Spjd objset_t *mos = spa->spa_meta_objset; 156168404Spjd int err; 157168404Spjd 158168404Spjd ASSERT(MUTEX_HELD(&spa->spa_history_lock)); 159168404Spjd 160168404Spjd /* see if we need to reset logical BOF */ 161168404Spjd while (shpp->sh_phys_max_off - shpp->sh_pool_create_len - 162168404Spjd (shpp->sh_eof - shpp->sh_bof) <= len) { 163185029Spjd if ((err = spa_history_advance_bof(spa, shpp)) != 0) { 164168404Spjd return (err); 165185029Spjd } 166168404Spjd } 167168404Spjd 168168404Spjd phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); 169168404Spjd firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof); 170168404Spjd shpp->sh_eof += len; 171168404Spjd dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx); 172168404Spjd 173168404Spjd len -= firstwrite; 174168404Spjd if (len > 0) { 175168404Spjd /* write out the rest at the beginning of physical file */ 176168404Spjd dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len, 177168404Spjd len, (char *)buf + firstwrite, tx); 178168404Spjd } 179168404Spjd 180168404Spjd return (0); 181168404Spjd} 182168404Spjd 183185029Spjdstatic char * 184248571Smmspa_history_zone(void) 185185029Spjd{ 186185029Spjd#ifdef _KERNEL 187194118Sjamie /* XXX: pr_hostname can be changed by default from within a jail! */ 188185029Spjd if (jailed(curthread->td_ucred)) 189194118Sjamie return (curthread->td_ucred->cr_prison->pr_hostname); 190185029Spjd#endif 191248571Smm return (NULL); 192185029Spjd} 193185029Spjd 194168404Spjd/* 195331397Smav * Post a history sysevent. 196331397Smav * 197331397Smav * The nvlist_t* passed into this function will be transformed into a new 198331397Smav * nvlist where: 199331397Smav * 200331397Smav * 1. Nested nvlists will be flattened to a single level 201331397Smav * 2. Keys will have their names normalized (to remove any problematic 202331397Smav * characters, such as whitespace) 203331397Smav * 204331397Smav * The nvlist_t passed into this function will duplicated and should be freed 205331397Smav * by caller. 206331397Smav * 207331397Smav */ 208331397Smavstatic void 209331397Smavspa_history_log_notify(spa_t *spa, nvlist_t *nvl) 210331397Smav{ 211331397Smav nvlist_t *hist_nvl = fnvlist_alloc(); 212331397Smav uint64_t uint64; 213331397Smav char *string; 214331397Smav 215331397Smav if (nvlist_lookup_string(nvl, ZPOOL_HIST_CMD, &string) == 0) 216331397Smav fnvlist_add_string(hist_nvl, ZFS_EV_HIST_CMD, string); 217331397Smav 218331397Smav if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0) 219331397Smav fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string); 220331397Smav 221331397Smav if (nvlist_lookup_string(nvl, ZPOOL_HIST_ZONE, &string) == 0) 222331397Smav fnvlist_add_string(hist_nvl, ZFS_EV_HIST_ZONE, string); 223331397Smav 224331397Smav if (nvlist_lookup_string(nvl, ZPOOL_HIST_HOST, &string) == 0) 225331397Smav fnvlist_add_string(hist_nvl, ZFS_EV_HIST_HOST, string); 226331397Smav 227331397Smav if (nvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME, &string) == 0) 228331397Smav fnvlist_add_string(hist_nvl, ZFS_EV_HIST_DSNAME, string); 229331397Smav 230331397Smav if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR, &string) == 0) 231331397Smav fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_STR, string); 232331397Smav 233331397Smav if (nvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL, &string) == 0) 234331397Smav fnvlist_add_string(hist_nvl, ZFS_EV_HIST_IOCTL, string); 235331397Smav 236331397Smav if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0) 237331397Smav fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string); 238331397Smav 239331397Smav if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID, &uint64) == 0) 240331397Smav fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_DSID, uint64); 241331397Smav 242331397Smav if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG, &uint64) == 0) 243331397Smav fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TXG, uint64); 244331397Smav 245331397Smav if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TIME, &uint64) == 0) 246331397Smav fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TIME, uint64); 247331397Smav 248331397Smav if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_WHO, &uint64) == 0) 249331397Smav fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_WHO, uint64); 250331397Smav 251331397Smav if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_INT_EVENT, &uint64) == 0) 252331397Smav fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_INT_EVENT, uint64); 253331397Smav 254331397Smav spa_event_notify(spa, NULL, hist_nvl, ESC_ZFS_HISTORY_EVENT); 255331397Smav 256331397Smav nvlist_free(hist_nvl); 257331397Smav} 258331397Smav 259331397Smav/* 260168404Spjd * Write out a history event. 261168404Spjd */ 262219089Spjd/*ARGSUSED*/ 263185029Spjdstatic void 264248571Smmspa_history_log_sync(void *arg, dmu_tx_t *tx) 265168404Spjd{ 266248571Smm nvlist_t *nvl = arg; 267248571Smm spa_t *spa = dmu_tx_pool(tx)->dp_spa; 268168404Spjd objset_t *mos = spa->spa_meta_objset; 269168404Spjd dmu_buf_t *dbp; 270168404Spjd spa_history_phys_t *shpp; 271168404Spjd size_t reclen; 272168404Spjd uint64_t le_len; 273168404Spjd char *record_packed = NULL; 274168404Spjd int ret; 275168404Spjd 276168404Spjd /* 277168404Spjd * If we have an older pool that doesn't have a command 278168404Spjd * history object, create it now. 279168404Spjd */ 280168404Spjd mutex_enter(&spa->spa_history_lock); 281168404Spjd if (!spa->spa_history) 282168404Spjd spa_history_create_obj(spa, tx); 283168404Spjd mutex_exit(&spa->spa_history_lock); 284168404Spjd 285168404Spjd /* 286168404Spjd * Get the offset of where we need to write via the bonus buffer. 287168404Spjd * Update the offset when the write completes. 288168404Spjd */ 289248571Smm VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)); 290168404Spjd shpp = dbp->db_data; 291168404Spjd 292168404Spjd dmu_buf_will_dirty(dbp, tx); 293168404Spjd 294168404Spjd#ifdef ZFS_DEBUG 295168404Spjd { 296168404Spjd dmu_object_info_t doi; 297168404Spjd dmu_object_info_from_db(dbp, &doi); 298168404Spjd ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); 299168404Spjd } 300168404Spjd#endif 301168404Spjd 302248571Smm fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec()); 303185029Spjd#ifdef _KERNEL 304248571Smm fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename); 305185029Spjd#endif 306248571Smm if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) { 307248571Smm zfs_dbgmsg("command: %s", 308248571Smm fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD)); 309248571Smm } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) { 310248571Smm if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) { 311248571Smm zfs_dbgmsg("txg %lld %s %s (id %llu) %s", 312248571Smm fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), 313248571Smm fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), 314248571Smm fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME), 315248571Smm fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID), 316248571Smm fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); 317248571Smm } else { 318248571Smm zfs_dbgmsg("txg %lld %s %s", 319248571Smm fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG), 320248571Smm fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME), 321248571Smm fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR)); 322248571Smm } 323331397Smav /* 324331397Smav * The history sysevent is posted only for internal history 325331397Smav * messages to show what has happened, not how it happened. For 326331397Smav * example, the following command: 327331397Smav * 328331397Smav * # zfs destroy -r tank/foo 329331397Smav * 330331397Smav * will result in one sysevent posted per dataset that is 331331397Smav * destroyed as a result of the command - which could be more 332331397Smav * than one event in total. By contrast, if the sysevent was 333331397Smav * posted as a result of the ZPOOL_HIST_CMD key being present 334331397Smav * it would result in only one sysevent being posted with the 335331397Smav * full command line arguments, requiring the consumer to know 336331397Smav * how to parse and understand zfs(1M) command invocations. 337331397Smav */ 338331397Smav spa_history_log_notify(spa, nvl); 339248571Smm } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) { 340248571Smm zfs_dbgmsg("ioctl %s", 341248571Smm fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL)); 342185029Spjd } 343185029Spjd 344248571Smm record_packed = fnvlist_pack(nvl, &reclen); 345185029Spjd 346168404Spjd mutex_enter(&spa->spa_history_lock); 347168404Spjd 348168404Spjd /* write out the packed length as little endian */ 349168404Spjd le_len = LE_64((uint64_t)reclen); 350168404Spjd ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx); 351168404Spjd if (!ret) 352168404Spjd ret = spa_history_write(spa, record_packed, reclen, shpp, tx); 353168404Spjd 354248571Smm /* The first command is the create, which we keep forever */ 355248571Smm if (ret == 0 && shpp->sh_pool_create_len == 0 && 356248571Smm nvlist_exists(nvl, ZPOOL_HIST_CMD)) { 357248571Smm shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof; 358168404Spjd } 359168404Spjd 360168404Spjd mutex_exit(&spa->spa_history_lock); 361248571Smm fnvlist_pack_free(record_packed, reclen); 362168404Spjd dmu_buf_rele(dbp, FTAG); 363248571Smm fnvlist_free(nvl); 364168404Spjd} 365168404Spjd 366168404Spjd/* 367168404Spjd * Write out a history event. 368168404Spjd */ 369168404Spjdint 370248571Smmspa_history_log(spa_t *spa, const char *msg) 371168404Spjd{ 372248571Smm int err; 373248571Smm nvlist_t *nvl = fnvlist_alloc(); 374248571Smm 375248571Smm fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); 376248571Smm err = spa_history_log_nvl(spa, nvl); 377248571Smm fnvlist_free(nvl); 378248571Smm return (err); 379248571Smm} 380248571Smm 381248571Smmint 382248571Smmspa_history_log_nvl(spa_t *spa, nvlist_t *nvl) 383248571Smm{ 384219089Spjd int err = 0; 385219089Spjd dmu_tx_t *tx; 386248571Smm nvlist_t *nvarg; 387168404Spjd 388248571Smm if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) 389248571Smm return (EINVAL); 390185029Spjd 391240133Smm if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) 392249195Smm return (SET_ERROR(EINVAL)); 393240133Smm 394219089Spjd tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 395219089Spjd err = dmu_tx_assign(tx, TXG_WAIT); 396219089Spjd if (err) { 397219089Spjd dmu_tx_abort(tx); 398219089Spjd return (err); 399219089Spjd } 400219089Spjd 401248571Smm nvarg = fnvlist_dup(nvl); 402248571Smm if (spa_history_zone() != NULL) { 403248571Smm fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, 404248571Smm spa_history_zone()); 405248571Smm } 406248571Smm fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); 407219089Spjd 408219089Spjd /* Kick this off asynchronously; errors are ignored. */ 409248571Smm dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, 410268473Sdelphij nvarg, 0, ZFS_SPACE_CHECK_NONE, tx); 411219089Spjd dmu_tx_commit(tx); 412219089Spjd 413248571Smm /* spa_history_log_sync will free nvl */ 414219089Spjd return (err); 415248571Smm 416168404Spjd} 417168404Spjd 418168404Spjd/* 419168404Spjd * Read out the command history. 420168404Spjd */ 421168404Spjdint 422168404Spjdspa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf) 423168404Spjd{ 424168404Spjd objset_t *mos = spa->spa_meta_objset; 425168404Spjd dmu_buf_t *dbp; 426168404Spjd uint64_t read_len, phys_read_off, phys_eof; 427168404Spjd uint64_t leftover = 0; 428168404Spjd spa_history_phys_t *shpp; 429168404Spjd int err; 430168404Spjd 431168404Spjd /* 432248571Smm * If the command history doesn't exist (older pool), 433168404Spjd * that's ok, just return ENOENT. 434168404Spjd */ 435168404Spjd if (!spa->spa_history) 436249195Smm return (SET_ERROR(ENOENT)); 437168404Spjd 438219089Spjd /* 439219089Spjd * The history is logged asynchronously, so when they request 440219089Spjd * the first chunk of history, make sure everything has been 441219089Spjd * synced to disk so that we get it. 442219089Spjd */ 443219089Spjd if (*offp == 0 && spa_writeable(spa)) 444219089Spjd txg_wait_synced(spa_get_dsl(spa), 0); 445219089Spjd 446168404Spjd if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0) 447168404Spjd return (err); 448168404Spjd shpp = dbp->db_data; 449168404Spjd 450168404Spjd#ifdef ZFS_DEBUG 451168404Spjd { 452168404Spjd dmu_object_info_t doi; 453168404Spjd dmu_object_info_from_db(dbp, &doi); 454168404Spjd ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS); 455168404Spjd } 456168404Spjd#endif 457168404Spjd 458168404Spjd mutex_enter(&spa->spa_history_lock); 459168404Spjd phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); 460168404Spjd 461168404Spjd if (*offp < shpp->sh_pool_create_len) { 462168404Spjd /* read in just the zpool create history */ 463168404Spjd phys_read_off = *offp; 464168404Spjd read_len = MIN(*len, shpp->sh_pool_create_len - 465168404Spjd phys_read_off); 466168404Spjd } else { 467168404Spjd /* 468168404Spjd * Need to reset passed in offset to BOF if the passed in 469168404Spjd * offset has since been overwritten. 470168404Spjd */ 471168404Spjd *offp = MAX(*offp, shpp->sh_bof); 472168404Spjd phys_read_off = spa_history_log_to_phys(*offp, shpp); 473168404Spjd 474168404Spjd /* 475168404Spjd * Read up to the minimum of what the user passed down or 476168404Spjd * the EOF (physical or logical). If we hit physical EOF, 477168404Spjd * use 'leftover' to read from the physical BOF. 478168404Spjd */ 479168404Spjd if (phys_read_off <= phys_eof) { 480168404Spjd read_len = MIN(*len, phys_eof - phys_read_off); 481168404Spjd } else { 482168404Spjd read_len = MIN(*len, 483168404Spjd shpp->sh_phys_max_off - phys_read_off); 484168404Spjd if (phys_read_off + *len > shpp->sh_phys_max_off) { 485168404Spjd leftover = MIN(*len - read_len, 486168404Spjd phys_eof - shpp->sh_pool_create_len); 487168404Spjd } 488168404Spjd } 489168404Spjd } 490168404Spjd 491168404Spjd /* offset for consumer to use next */ 492168404Spjd *offp += read_len + leftover; 493168404Spjd 494168404Spjd /* tell the consumer how much you actually read */ 495168404Spjd *len = read_len + leftover; 496168404Spjd 497168404Spjd if (read_len == 0) { 498168404Spjd mutex_exit(&spa->spa_history_lock); 499168404Spjd dmu_buf_rele(dbp, FTAG); 500168404Spjd return (0); 501168404Spjd } 502168404Spjd 503209962Smm err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf, 504209962Smm DMU_READ_PREFETCH); 505168404Spjd if (leftover && err == 0) { 506168404Spjd err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len, 507209962Smm leftover, buf + read_len, DMU_READ_PREFETCH); 508168404Spjd } 509168404Spjd mutex_exit(&spa->spa_history_lock); 510168404Spjd 511168404Spjd dmu_buf_rele(dbp, FTAG); 512168404Spjd return (err); 513168404Spjd} 514185029Spjd 515248571Smm/* 516248571Smm * The nvlist will be consumed by this call. 517248571Smm */ 518219089Spjdstatic void 519248571Smmlog_internal(nvlist_t *nvl, const char *operation, spa_t *spa, 520219089Spjd dmu_tx_t *tx, const char *fmt, va_list adx) 521185029Spjd{ 522248571Smm char *msg; 523219089Spjd va_list adx2; 524185029Spjd 525185029Spjd /* 526185029Spjd * If this is part of creating a pool, not everything is 527185029Spjd * initialized yet, so don't bother logging the internal events. 528240133Smm * Likewise if the pool is not writeable. 529185029Spjd */ 530248571Smm if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) { 531248571Smm fnvlist_free(nvl); 532185029Spjd return; 533248571Smm } 534185029Spjd 535219089Spjd va_copy(adx2, adx); 536185029Spjd 537248571Smm msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP); 538248571Smm (void) vsprintf(msg, fmt, adx2); 539248571Smm fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); 540248571Smm strfree(msg); 541185029Spjd 542219089Spjd va_end(adx2); 543185029Spjd 544248571Smm fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation); 545248571Smm fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg); 546219089Spjd 547185029Spjd if (dmu_tx_is_syncing(tx)) { 548248571Smm spa_history_log_sync(nvl, tx); 549185029Spjd } else { 550248571Smm dsl_sync_task_nowait(spa_get_dsl(spa), 551268473Sdelphij spa_history_log_sync, nvl, 0, ZFS_SPACE_CHECK_NONE, tx); 552185029Spjd } 553248571Smm /* spa_history_log_sync() will free nvl */ 554185029Spjd} 555219089Spjd 556219089Spjdvoid 557248571Smmspa_history_log_internal(spa_t *spa, const char *operation, 558219089Spjd dmu_tx_t *tx, const char *fmt, ...) 559219089Spjd{ 560219089Spjd dmu_tx_t *htx = tx; 561219089Spjd va_list adx; 562219089Spjd 563219089Spjd /* create a tx if we didn't get one */ 564219089Spjd if (tx == NULL) { 565219089Spjd htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 566219089Spjd if (dmu_tx_assign(htx, TXG_WAIT) != 0) { 567219089Spjd dmu_tx_abort(htx); 568219089Spjd return; 569219089Spjd } 570219089Spjd } 571219089Spjd 572219089Spjd va_start(adx, fmt); 573248571Smm log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx); 574219089Spjd va_end(adx); 575219089Spjd 576219089Spjd /* if we didn't get a tx from the caller, commit the one we made */ 577219089Spjd if (tx == NULL) 578219089Spjd dmu_tx_commit(htx); 579219089Spjd} 580219089Spjd 581219089Spjdvoid 582248571Smmspa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, 583248571Smm dmu_tx_t *tx, const char *fmt, ...) 584219089Spjd{ 585248571Smm va_list adx; 586307108Smav char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 587248571Smm nvlist_t *nvl = fnvlist_alloc(); 588219089Spjd 589248571Smm ASSERT(tx != NULL); 590248571Smm 591248571Smm dsl_dataset_name(ds, namebuf); 592248571Smm fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); 593248571Smm fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); 594248571Smm 595248571Smm va_start(adx, fmt); 596248571Smm log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx); 597248571Smm va_end(adx); 598219089Spjd} 599248571Smm 600248571Smmvoid 601248571Smmspa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, 602248571Smm dmu_tx_t *tx, const char *fmt, ...) 603248571Smm{ 604248571Smm va_list adx; 605307108Smav char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 606248571Smm nvlist_t *nvl = fnvlist_alloc(); 607248571Smm 608248571Smm ASSERT(tx != NULL); 609248571Smm 610248571Smm dsl_dir_name(dd, namebuf); 611248571Smm fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); 612248571Smm fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, 613275782Sdelphij dsl_dir_phys(dd)->dd_head_dataset_obj); 614248571Smm 615248571Smm va_start(adx, fmt); 616248571Smm log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); 617248571Smm va_end(adx); 618248571Smm} 619248571Smm 620248571Smmvoid 621248571Smmspa_history_log_version(spa_t *spa, const char *operation) 622248571Smm{ 623248571Smm spa_history_log_internal(spa, operation, NULL, 624329481Smav "pool version %llu; software version %llu/%llu; uts %s %s %s %s", 625248571Smm (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION, 626248571Smm utsname.nodename, utsname.release, utsname.version, 627248571Smm utsname.machine); 628248571Smm} 629