1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23249643Smm * Copyright (c) 2013 by Delphix. All rights reserved. 24262089Savg * Copyright (c) 2013, Joyent, Inc. All rights reserved. 25168404Spjd */ 26168404Spjd 27168404Spjd#include <sys/zfs_context.h> 28168404Spjd#include <sys/spa.h> 29168404Spjd#include <sys/zio.h> 30168404Spjd#include <sys/zio_checksum.h> 31219089Spjd#include <sys/zil.h> 32219089Spjd#include <zfs_fletcher.h> 33168404Spjd 34168404Spjd/* 35168404Spjd * Checksum vectors. 36168404Spjd * 37168404Spjd * In the SPA, everything is checksummed. We support checksum vectors 38168404Spjd * for three distinct reasons: 39168404Spjd * 40168404Spjd * 1. Different kinds of data need different levels of protection. 41168404Spjd * For SPA metadata, we always want a very strong checksum. 42168404Spjd * For user data, we let users make the trade-off between speed 43168404Spjd * and checksum strength. 44168404Spjd * 45168404Spjd * 2. Cryptographic hash and MAC algorithms are an area of active research. 46168404Spjd * It is likely that in future hash functions will be at least as strong 47168404Spjd * as current best-of-breed, and may be substantially faster as well. 48168404Spjd * We want the ability to take advantage of these new hashes as soon as 49168404Spjd * they become available. 50168404Spjd * 51168404Spjd * 3. If someone develops hardware that can compute a strong hash quickly, 52168404Spjd * we want the ability to take advantage of that hardware. 53168404Spjd * 54168404Spjd * Of course, we don't want a checksum upgrade to invalidate existing 55219089Spjd * data, so we store the checksum *function* in eight bits of the bp. 56219089Spjd * This gives us room for up to 256 different checksum functions. 57168404Spjd * 58168404Spjd * When writing a block, we always checksum it with the latest-and-greatest 59168404Spjd * checksum function of the appropriate strength. When reading a block, 60168404Spjd * we compare the expected checksum against the actual checksum, which we 61219089Spjd * compute via the checksum function specified by BP_GET_CHECKSUM(bp). 62168404Spjd */ 63168404Spjd 64168404Spjd/*ARGSUSED*/ 65168404Spjdstatic void 66168404Spjdzio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) 67168404Spjd{ 68168404Spjd ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); 69168404Spjd} 70168404Spjd 71168404Spjdzio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { 72219089Spjd {{NULL, NULL}, 0, 0, 0, "inherit"}, 73219089Spjd {{NULL, NULL}, 0, 0, 0, "on"}, 74219089Spjd {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "off"}, 75219089Spjd {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "label"}, 76219089Spjd {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "gang_header"}, 77219089Spjd {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, 0, "zilog"}, 78219089Spjd {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, 0, "fletcher2"}, 79219089Spjd {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"}, 80219089Spjd {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"}, 81219089Spjd {{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"}, 82262089Savg {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "noparity"}, 83168404Spjd}; 84168404Spjd 85219089Spjdenum zio_checksum 86219089Spjdzio_checksum_select(enum zio_checksum child, enum zio_checksum parent) 87168404Spjd{ 88168404Spjd ASSERT(child < ZIO_CHECKSUM_FUNCTIONS); 89168404Spjd ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS); 90168404Spjd ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); 91168404Spjd 92168404Spjd if (child == ZIO_CHECKSUM_INHERIT) 93168404Spjd return (parent); 94168404Spjd 95168404Spjd if (child == ZIO_CHECKSUM_ON) 96168404Spjd return (ZIO_CHECKSUM_ON_VALUE); 97168404Spjd 98168404Spjd return (child); 99168404Spjd} 100168404Spjd 101219089Spjdenum zio_checksum 102219089Spjdzio_checksum_dedup_select(spa_t *spa, enum zio_checksum child, 103219089Spjd enum zio_checksum parent) 104219089Spjd{ 105219089Spjd ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); 106219089Spjd ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); 107219089Spjd ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); 108219089Spjd 109219089Spjd if (child == ZIO_CHECKSUM_INHERIT) 110219089Spjd return (parent); 111219089Spjd 112219089Spjd if (child == ZIO_CHECKSUM_ON) 113219089Spjd return (spa_dedup_checksum(spa)); 114219089Spjd 115219089Spjd if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY)) 116219089Spjd return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY); 117219089Spjd 118219089Spjd ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup || 119219089Spjd (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF); 120219089Spjd 121219089Spjd return (child); 122219089Spjd} 123219089Spjd 124168404Spjd/* 125185029Spjd * Set the external verifier for a gang block based on <vdev, offset, txg>, 126185029Spjd * a tuple which is guaranteed to be unique for the life of the pool. 127185029Spjd */ 128185029Spjdstatic void 129185029Spjdzio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp) 130185029Spjd{ 131185029Spjd dva_t *dva = BP_IDENTITY(bp); 132219089Spjd uint64_t txg = BP_PHYSICAL_BIRTH(bp); 133185029Spjd 134185029Spjd ASSERT(BP_IS_GANG(bp)); 135185029Spjd 136185029Spjd ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0); 137185029Spjd} 138185029Spjd 139185029Spjd/* 140185029Spjd * Set the external verifier for a label block based on its offset. 141185029Spjd * The vdev is implicit, and the txg is unknowable at pool open time -- 142185029Spjd * hence the logic in vdev_uberblock_load() to find the most recent copy. 143185029Spjd */ 144185029Spjdstatic void 145185029Spjdzio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset) 146185029Spjd{ 147185029Spjd ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0); 148185029Spjd} 149185029Spjd 150185029Spjd/* 151168404Spjd * Generate the checksum. 152168404Spjd */ 153168404Spjdvoid 154185029Spjdzio_checksum_compute(zio_t *zio, enum zio_checksum checksum, 155185029Spjd void *data, uint64_t size) 156168404Spjd{ 157185029Spjd blkptr_t *bp = zio->io_bp; 158185029Spjd uint64_t offset = zio->io_offset; 159168404Spjd zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 160219089Spjd zio_cksum_t cksum; 161168404Spjd 162185029Spjd ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); 163168404Spjd ASSERT(ci->ci_func[0] != NULL); 164168404Spjd 165219089Spjd if (ci->ci_eck) { 166219089Spjd zio_eck_t *eck; 167219089Spjd 168219089Spjd if (checksum == ZIO_CHECKSUM_ZILOG2) { 169219089Spjd zil_chain_t *zilc = data; 170219089Spjd 171219089Spjd size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ, 172219089Spjd uint64_t); 173219089Spjd eck = &zilc->zc_eck; 174219089Spjd } else { 175219089Spjd eck = (zio_eck_t *)((char *)data + size) - 1; 176219089Spjd } 177185029Spjd if (checksum == ZIO_CHECKSUM_GANG_HEADER) 178219089Spjd zio_checksum_gang_verifier(&eck->zec_cksum, bp); 179185029Spjd else if (checksum == ZIO_CHECKSUM_LABEL) 180219089Spjd zio_checksum_label_verifier(&eck->zec_cksum, offset); 181185029Spjd else 182219089Spjd bp->blk_cksum = eck->zec_cksum; 183219089Spjd eck->zec_magic = ZEC_MAGIC; 184219089Spjd ci->ci_func[0](data, size, &cksum); 185219089Spjd eck->zec_cksum = cksum; 186168404Spjd } else { 187185029Spjd ci->ci_func[0](data, size, &bp->blk_cksum); 188168404Spjd } 189168404Spjd} 190168404Spjd 191168404Spjdint 192219089Spjdzio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) 193168404Spjd{ 194168404Spjd blkptr_t *bp = zio->io_bp; 195185029Spjd uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum : 196185029Spjd (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp))); 197185029Spjd int byteswap; 198219089Spjd int error; 199185029Spjd uint64_t size = (bp == NULL ? zio->io_size : 200185029Spjd (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp))); 201185029Spjd uint64_t offset = zio->io_offset; 202219089Spjd void *data = zio->io_data; 203168404Spjd zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 204185029Spjd zio_cksum_t actual_cksum, expected_cksum, verifier; 205168404Spjd 206168404Spjd if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) 207249643Smm return (SET_ERROR(EINVAL)); 208168404Spjd 209219089Spjd if (ci->ci_eck) { 210219089Spjd zio_eck_t *eck; 211219089Spjd 212219089Spjd if (checksum == ZIO_CHECKSUM_ZILOG2) { 213219089Spjd zil_chain_t *zilc = data; 214219089Spjd uint64_t nused; 215219089Spjd 216219089Spjd eck = &zilc->zc_eck; 217219089Spjd if (eck->zec_magic == ZEC_MAGIC) 218219089Spjd nused = zilc->zc_nused; 219219089Spjd else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) 220219089Spjd nused = BSWAP_64(zilc->zc_nused); 221219089Spjd else 222249643Smm return (SET_ERROR(ECKSUM)); 223219089Spjd 224219089Spjd if (nused > size) 225249643Smm return (SET_ERROR(ECKSUM)); 226219089Spjd 227219089Spjd size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); 228219089Spjd } else { 229219089Spjd eck = (zio_eck_t *)((char *)data + size) - 1; 230219089Spjd } 231219089Spjd 232168404Spjd if (checksum == ZIO_CHECKSUM_GANG_HEADER) 233185029Spjd zio_checksum_gang_verifier(&verifier, bp); 234185029Spjd else if (checksum == ZIO_CHECKSUM_LABEL) 235185029Spjd zio_checksum_label_verifier(&verifier, offset); 236185029Spjd else 237185029Spjd verifier = bp->blk_cksum; 238168404Spjd 239219089Spjd byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); 240185029Spjd 241185029Spjd if (byteswap) 242185029Spjd byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); 243185029Spjd 244219089Spjd expected_cksum = eck->zec_cksum; 245219089Spjd eck->zec_cksum = verifier; 246185029Spjd ci->ci_func[byteswap](data, size, &actual_cksum); 247219089Spjd eck->zec_cksum = expected_cksum; 248185029Spjd 249185029Spjd if (byteswap) 250168404Spjd byteswap_uint64_array(&expected_cksum, 251168404Spjd sizeof (zio_cksum_t)); 252168404Spjd } else { 253168404Spjd ASSERT(!BP_IS_GANG(bp)); 254185029Spjd byteswap = BP_SHOULD_BYTESWAP(bp); 255185029Spjd expected_cksum = bp->blk_cksum; 256168404Spjd ci->ci_func[byteswap](data, size, &actual_cksum); 257168404Spjd } 258168404Spjd 259219089Spjd info->zbc_expected = expected_cksum; 260219089Spjd info->zbc_actual = actual_cksum; 261219089Spjd info->zbc_checksum_name = ci->ci_name; 262219089Spjd info->zbc_byteswapped = byteswap; 263219089Spjd info->zbc_injected = 0; 264219089Spjd info->zbc_has_cksum = 1; 265219089Spjd 266185029Spjd if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) 267249643Smm return (SET_ERROR(ECKSUM)); 268168404Spjd 269219089Spjd if (zio_injection_enabled && !zio->io_error && 270219089Spjd (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) { 271168404Spjd 272219089Spjd info->zbc_injected = 1; 273219089Spjd return (error); 274219089Spjd } 275219089Spjd 276168404Spjd return (0); 277168404Spjd} 278