1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23290765Smav * Copyright (c) 2013, 2015 by Delphix. All rights reserved. 24255750Sdelphij * Copyright (c) 2013, Joyent, Inc. All rights reserved. 25290757Smav * Copyright 2013 Saso Kiselkov. All rights reserved. 26168404Spjd */ 27168404Spjd 28168404Spjd#include <sys/zfs_context.h> 29168404Spjd#include <sys/spa.h> 30290757Smav#include <sys/spa_impl.h> 31168404Spjd#include <sys/zio.h> 32168404Spjd#include <sys/zio_checksum.h> 33219089Spjd#include <sys/zil.h> 34219089Spjd#include <zfs_fletcher.h> 35168404Spjd 36168404Spjd/* 37168404Spjd * Checksum vectors. 38168404Spjd * 39168404Spjd * In the SPA, everything is checksummed. We support checksum vectors 40168404Spjd * for three distinct reasons: 41168404Spjd * 42168404Spjd * 1. Different kinds of data need different levels of protection. 43168404Spjd * For SPA metadata, we always want a very strong checksum. 44168404Spjd * For user data, we let users make the trade-off between speed 45168404Spjd * and checksum strength. 46168404Spjd * 47168404Spjd * 2. Cryptographic hash and MAC algorithms are an area of active research. 48168404Spjd * It is likely that in future hash functions will be at least as strong 49168404Spjd * as current best-of-breed, and may be substantially faster as well. 50168404Spjd * We want the ability to take advantage of these new hashes as soon as 51168404Spjd * they become available. 52168404Spjd * 53168404Spjd * 3. If someone develops hardware that can compute a strong hash quickly, 54168404Spjd * we want the ability to take advantage of that hardware. 55168404Spjd * 56168404Spjd * Of course, we don't want a checksum upgrade to invalidate existing 57219089Spjd * data, so we store the checksum *function* in eight bits of the bp. 58219089Spjd * This gives us room for up to 256 different checksum functions. 59168404Spjd * 60168404Spjd * When writing a block, we always checksum it with the latest-and-greatest 61168404Spjd * checksum function of the appropriate strength. When reading a block, 62168404Spjd * we compare the expected checksum against the actual checksum, which we 63219089Spjd * compute via the checksum function specified by BP_GET_CHECKSUM(bp). 64290757Smav * 65290757Smav * SALTED CHECKSUMS 66290757Smav * 67290757Smav * To enable the use of less secure hash algorithms with dedup, we 68290757Smav * introduce the notion of salted checksums (MACs, really). A salted 69290757Smav * checksum is fed both a random 256-bit value (the salt) and the data 70290757Smav * to be checksummed. This salt is kept secret (stored on the pool, but 71290757Smav * never shown to the user). Thus even if an attacker knew of collision 72290757Smav * weaknesses in the hash algorithm, they won't be able to mount a known 73290757Smav * plaintext attack on the DDT, since the actual hash value cannot be 74290757Smav * known ahead of time. How the salt is used is algorithm-specific 75290757Smav * (some might simply prefix it to the data block, others might need to 76290757Smav * utilize a full-blown HMAC). On disk the salt is stored in a ZAP 77290757Smav * object in the MOS (DMU_POOL_CHECKSUM_SALT). 78290757Smav * 79290757Smav * CONTEXT TEMPLATES 80290757Smav * 81290757Smav * Some hashing algorithms need to perform a substantial amount of 82290757Smav * initialization work (e.g. salted checksums above may need to pre-hash 83290757Smav * the salt) before being able to process data. Performing this 84290757Smav * redundant work for each block would be wasteful, so we instead allow 85290757Smav * a checksum algorithm to do the work once (the first time it's used) 86290757Smav * and then keep this pre-initialized context as a template inside the 87290757Smav * spa_t (spa_cksum_tmpls). If the zio_checksum_info_t contains 88290757Smav * non-NULL ci_tmpl_init and ci_tmpl_free callbacks, they are used to 89290757Smav * construct and destruct the pre-initialized checksum context. The 90290757Smav * pre-initialized context is then reused during each checksum 91290757Smav * invocation and passed to the checksum function. 92168404Spjd */ 93168404Spjd 94168404Spjd/*ARGSUSED*/ 95168404Spjdstatic void 96290757Smavzio_checksum_off(const void *buf, uint64_t size, 97290757Smav const void *ctx_template, zio_cksum_t *zcp) 98168404Spjd{ 99168404Spjd ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); 100168404Spjd} 101168404Spjd 102168404Spjdzio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { 103290757Smav {{NULL, NULL}, NULL, NULL, 0, "inherit"}, 104290757Smav {{NULL, NULL}, NULL, NULL, 0, "on"}, 105290757Smav {{zio_checksum_off, zio_checksum_off}, 106290757Smav NULL, NULL, 0, "off"}, 107290757Smav {{zio_checksum_SHA256, zio_checksum_SHA256}, 108290757Smav NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, 109290757Smav "label"}, 110290757Smav {{zio_checksum_SHA256, zio_checksum_SHA256}, 111290757Smav NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, 112290757Smav "gang_header"}, 113290757Smav {{fletcher_2_native, fletcher_2_byteswap}, 114290757Smav NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog"}, 115290757Smav {{fletcher_2_native, fletcher_2_byteswap}, 116290757Smav NULL, NULL, 0, "fletcher2"}, 117290757Smav {{fletcher_4_native, fletcher_4_byteswap}, 118290757Smav NULL, NULL, ZCHECKSUM_FLAG_METADATA, "fletcher4"}, 119290757Smav {{zio_checksum_SHA256, zio_checksum_SHA256}, 120290757Smav NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | 121290757Smav ZCHECKSUM_FLAG_NOPWRITE, "sha256"}, 122290757Smav {{fletcher_4_native, fletcher_4_byteswap}, 123290757Smav NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog2"}, 124290757Smav {{zio_checksum_off, zio_checksum_off}, 125290757Smav NULL, NULL, 0, "noparity"}, 126290757Smav#ifdef illumos 127290757Smav {{zio_checksum_SHA512_native, zio_checksum_SHA512_byteswap}, 128290757Smav NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | 129290757Smav ZCHECKSUM_FLAG_NOPWRITE, "sha512"}, 130290757Smav {{zio_checksum_skein_native, zio_checksum_skein_byteswap}, 131290757Smav zio_checksum_skein_tmpl_init, zio_checksum_skein_tmpl_free, 132290757Smav ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | 133290757Smav ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "skein"}, 134290757Smav {{zio_checksum_edonr_native, zio_checksum_edonr_byteswap}, 135290757Smav zio_checksum_edonr_tmpl_init, zio_checksum_edonr_tmpl_free, 136290757Smav ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED | 137290757Smav ZCHECKSUM_FLAG_NOPWRITE, "edonr"}, 138290757Smav#endif 139168404Spjd}; 140168404Spjd 141297114Smav/* 142297114Smav * The flag corresponding to the "verify" in dedup=[checksum,]verify 143297114Smav * must be cleared first, so callers should use ZIO_CHECKSUM_MASK. 144297114Smav */ 145290757Smavspa_feature_t 146290757Smavzio_checksum_to_feature(enum zio_checksum cksum) 147290757Smav{ 148290757Smav#ifdef illumos 149297114Smav VERIFY((cksum & ~ZIO_CHECKSUM_MASK) == 0); 150297114Smav 151290757Smav switch (cksum) { 152290757Smav case ZIO_CHECKSUM_SHA512: 153290757Smav return (SPA_FEATURE_SHA512); 154290757Smav case ZIO_CHECKSUM_SKEIN: 155290757Smav return (SPA_FEATURE_SKEIN); 156290757Smav case ZIO_CHECKSUM_EDONR: 157290757Smav return (SPA_FEATURE_EDONR); 158290757Smav } 159290757Smav#endif 160290757Smav return (SPA_FEATURE_NONE); 161290757Smav} 162290757Smav 163219089Spjdenum zio_checksum 164219089Spjdzio_checksum_select(enum zio_checksum child, enum zio_checksum parent) 165168404Spjd{ 166168404Spjd ASSERT(child < ZIO_CHECKSUM_FUNCTIONS); 167168404Spjd ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS); 168168404Spjd ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); 169168404Spjd 170168404Spjd if (child == ZIO_CHECKSUM_INHERIT) 171168404Spjd return (parent); 172168404Spjd 173168404Spjd if (child == ZIO_CHECKSUM_ON) 174168404Spjd return (ZIO_CHECKSUM_ON_VALUE); 175168404Spjd 176168404Spjd return (child); 177168404Spjd} 178168404Spjd 179219089Spjdenum zio_checksum 180219089Spjdzio_checksum_dedup_select(spa_t *spa, enum zio_checksum child, 181219089Spjd enum zio_checksum parent) 182219089Spjd{ 183219089Spjd ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); 184219089Spjd ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); 185219089Spjd ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); 186219089Spjd 187219089Spjd if (child == ZIO_CHECKSUM_INHERIT) 188219089Spjd return (parent); 189219089Spjd 190219089Spjd if (child == ZIO_CHECKSUM_ON) 191219089Spjd return (spa_dedup_checksum(spa)); 192219089Spjd 193219089Spjd if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY)) 194219089Spjd return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY); 195219089Spjd 196290757Smav ASSERT((zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_flags & 197290757Smav ZCHECKSUM_FLAG_DEDUP) || 198219089Spjd (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF); 199219089Spjd 200219089Spjd return (child); 201219089Spjd} 202219089Spjd 203168404Spjd/* 204185029Spjd * Set the external verifier for a gang block based on <vdev, offset, txg>, 205185029Spjd * a tuple which is guaranteed to be unique for the life of the pool. 206185029Spjd */ 207185029Spjdstatic void 208185029Spjdzio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp) 209185029Spjd{ 210185029Spjd dva_t *dva = BP_IDENTITY(bp); 211219089Spjd uint64_t txg = BP_PHYSICAL_BIRTH(bp); 212185029Spjd 213185029Spjd ASSERT(BP_IS_GANG(bp)); 214185029Spjd 215185029Spjd ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0); 216185029Spjd} 217185029Spjd 218185029Spjd/* 219185029Spjd * Set the external verifier for a label block based on its offset. 220185029Spjd * The vdev is implicit, and the txg is unknowable at pool open time -- 221185029Spjd * hence the logic in vdev_uberblock_load() to find the most recent copy. 222185029Spjd */ 223185029Spjdstatic void 224185029Spjdzio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset) 225185029Spjd{ 226185029Spjd ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0); 227185029Spjd} 228185029Spjd 229185029Spjd/* 230290757Smav * Calls the template init function of a checksum which supports context 231290757Smav * templates and installs the template into the spa_t. 232290757Smav */ 233290757Smavstatic void 234290757Smavzio_checksum_template_init(enum zio_checksum checksum, spa_t *spa) 235290757Smav{ 236290757Smav zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 237290757Smav 238290757Smav if (ci->ci_tmpl_init == NULL) 239290757Smav return; 240290757Smav if (spa->spa_cksum_tmpls[checksum] != NULL) 241290757Smav return; 242290757Smav 243290757Smav VERIFY(ci->ci_tmpl_free != NULL); 244290757Smav mutex_enter(&spa->spa_cksum_tmpls_lock); 245290757Smav if (spa->spa_cksum_tmpls[checksum] == NULL) { 246290757Smav spa->spa_cksum_tmpls[checksum] = 247290757Smav ci->ci_tmpl_init(&spa->spa_cksum_salt); 248290757Smav VERIFY(spa->spa_cksum_tmpls[checksum] != NULL); 249290757Smav } 250290757Smav mutex_exit(&spa->spa_cksum_tmpls_lock); 251290757Smav} 252290757Smav 253290757Smav/* 254168404Spjd * Generate the checksum. 255168404Spjd */ 256168404Spjdvoid 257185029Spjdzio_checksum_compute(zio_t *zio, enum zio_checksum checksum, 258290765Smav void *data, uint64_t size) 259168404Spjd{ 260185029Spjd blkptr_t *bp = zio->io_bp; 261185029Spjd uint64_t offset = zio->io_offset; 262168404Spjd zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 263219089Spjd zio_cksum_t cksum; 264290757Smav spa_t *spa = zio->io_spa; 265168404Spjd 266185029Spjd ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); 267168404Spjd ASSERT(ci->ci_func[0] != NULL); 268168404Spjd 269290757Smav zio_checksum_template_init(checksum, spa); 270290757Smav 271290757Smav if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { 272219089Spjd zio_eck_t *eck; 273219089Spjd 274219089Spjd if (checksum == ZIO_CHECKSUM_ZILOG2) { 275219089Spjd zil_chain_t *zilc = data; 276219089Spjd 277219089Spjd size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ, 278219089Spjd uint64_t); 279219089Spjd eck = &zilc->zc_eck; 280219089Spjd } else { 281219089Spjd eck = (zio_eck_t *)((char *)data + size) - 1; 282219089Spjd } 283185029Spjd if (checksum == ZIO_CHECKSUM_GANG_HEADER) 284219089Spjd zio_checksum_gang_verifier(&eck->zec_cksum, bp); 285185029Spjd else if (checksum == ZIO_CHECKSUM_LABEL) 286219089Spjd zio_checksum_label_verifier(&eck->zec_cksum, offset); 287185029Spjd else 288219089Spjd bp->blk_cksum = eck->zec_cksum; 289219089Spjd eck->zec_magic = ZEC_MAGIC; 290290757Smav ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum], 291290757Smav &cksum); 292219089Spjd eck->zec_cksum = cksum; 293168404Spjd } else { 294290757Smav ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum], 295290757Smav &bp->blk_cksum); 296168404Spjd } 297168404Spjd} 298168404Spjd 299168404Spjdint 300307266Smavzio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, 301307266Smav void *data, uint64_t size, uint64_t offset, zio_bad_cksum_t *info) 302168404Spjd{ 303307266Smav zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 304307266Smav zio_cksum_t actual_cksum, expected_cksum; 305185029Spjd int byteswap; 306168404Spjd 307168404Spjd if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) 308249195Smm return (SET_ERROR(EINVAL)); 309168404Spjd 310290757Smav zio_checksum_template_init(checksum, spa); 311290757Smav 312290757Smav if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { 313219089Spjd zio_eck_t *eck; 314307266Smav zio_cksum_t verifier; 315219089Spjd 316219089Spjd if (checksum == ZIO_CHECKSUM_ZILOG2) { 317219089Spjd zil_chain_t *zilc = data; 318219089Spjd uint64_t nused; 319219089Spjd 320219089Spjd eck = &zilc->zc_eck; 321219089Spjd if (eck->zec_magic == ZEC_MAGIC) 322219089Spjd nused = zilc->zc_nused; 323219089Spjd else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) 324219089Spjd nused = BSWAP_64(zilc->zc_nused); 325219089Spjd else 326249195Smm return (SET_ERROR(ECKSUM)); 327219089Spjd 328219089Spjd if (nused > size) 329249195Smm return (SET_ERROR(ECKSUM)); 330219089Spjd 331219089Spjd size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); 332219089Spjd } else { 333219089Spjd eck = (zio_eck_t *)((char *)data + size) - 1; 334219089Spjd } 335219089Spjd 336168404Spjd if (checksum == ZIO_CHECKSUM_GANG_HEADER) 337185029Spjd zio_checksum_gang_verifier(&verifier, bp); 338185029Spjd else if (checksum == ZIO_CHECKSUM_LABEL) 339185029Spjd zio_checksum_label_verifier(&verifier, offset); 340185029Spjd else 341185029Spjd verifier = bp->blk_cksum; 342168404Spjd 343219089Spjd byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); 344185029Spjd 345185029Spjd if (byteswap) 346185029Spjd byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); 347185029Spjd 348219089Spjd expected_cksum = eck->zec_cksum; 349219089Spjd eck->zec_cksum = verifier; 350290757Smav ci->ci_func[byteswap](data, size, 351290757Smav spa->spa_cksum_tmpls[checksum], &actual_cksum); 352219089Spjd eck->zec_cksum = expected_cksum; 353185029Spjd 354307266Smav if (byteswap) { 355168404Spjd byteswap_uint64_array(&expected_cksum, 356168404Spjd sizeof (zio_cksum_t)); 357307266Smav } 358168404Spjd } else { 359185029Spjd byteswap = BP_SHOULD_BYTESWAP(bp); 360185029Spjd expected_cksum = bp->blk_cksum; 361290757Smav ci->ci_func[byteswap](data, size, 362290757Smav spa->spa_cksum_tmpls[checksum], &actual_cksum); 363168404Spjd } 364168404Spjd 365307266Smav if (info != NULL) { 366307266Smav info->zbc_expected = expected_cksum; 367307266Smav info->zbc_actual = actual_cksum; 368307266Smav info->zbc_checksum_name = ci->ci_name; 369307266Smav info->zbc_byteswapped = byteswap; 370307266Smav info->zbc_injected = 0; 371307266Smav info->zbc_has_cksum = 1; 372307266Smav } 373219089Spjd 374185029Spjd if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) 375249195Smm return (SET_ERROR(ECKSUM)); 376168404Spjd 377307266Smav return (0); 378307266Smav} 379307266Smav 380307266Smavint 381307266Smavzio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) 382307266Smav{ 383307266Smav blkptr_t *bp = zio->io_bp; 384307266Smav uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum : 385307266Smav (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp))); 386307266Smav int error; 387307266Smav uint64_t size = (bp == NULL ? zio->io_size : 388307266Smav (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp))); 389307266Smav uint64_t offset = zio->io_offset; 390307266Smav void *data = zio->io_data; 391307266Smav spa_t *spa = zio->io_spa; 392307266Smav 393307266Smav error = zio_checksum_error_impl(spa, bp, checksum, data, size, 394307266Smav offset, info); 395307266Smav if (error != 0 && zio_injection_enabled && !zio->io_error && 396219089Spjd (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) { 397168404Spjd 398219089Spjd info->zbc_injected = 1; 399219089Spjd return (error); 400219089Spjd } 401307266Smav return (error); 402168404Spjd} 403290757Smav 404290757Smav/* 405290757Smav * Called by a spa_t that's about to be deallocated. This steps through 406290757Smav * all of the checksum context templates and deallocates any that were 407290757Smav * initialized using the algorithm-specific template init function. 408290757Smav */ 409290757Smavvoid 410290757Smavzio_checksum_templates_free(spa_t *spa) 411290757Smav{ 412290757Smav for (enum zio_checksum checksum = 0; 413290757Smav checksum < ZIO_CHECKSUM_FUNCTIONS; checksum++) { 414290757Smav if (spa->spa_cksum_tmpls[checksum] != NULL) { 415290757Smav zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 416290757Smav 417290757Smav VERIFY(ci->ci_tmpl_free != NULL); 418290757Smav ci->ci_tmpl_free(spa->spa_cksum_tmpls[checksum]); 419290757Smav spa->spa_cksum_tmpls[checksum] = NULL; 420290757Smav } 421290757Smav } 422290757Smav} 423