zio_checksum.c revision 219089
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23168404Spjd */ 24168404Spjd 25168404Spjd#include <sys/zfs_context.h> 26168404Spjd#include <sys/spa.h> 27168404Spjd#include <sys/zio.h> 28168404Spjd#include <sys/zio_checksum.h> 29219089Spjd#include <sys/zil.h> 30219089Spjd#include <zfs_fletcher.h> 31168404Spjd 32168404Spjd/* 33168404Spjd * Checksum vectors. 34168404Spjd * 35168404Spjd * In the SPA, everything is checksummed. We support checksum vectors 36168404Spjd * for three distinct reasons: 37168404Spjd * 38168404Spjd * 1. Different kinds of data need different levels of protection. 39168404Spjd * For SPA metadata, we always want a very strong checksum. 40168404Spjd * For user data, we let users make the trade-off between speed 41168404Spjd * and checksum strength. 42168404Spjd * 43168404Spjd * 2. Cryptographic hash and MAC algorithms are an area of active research. 44168404Spjd * It is likely that in future hash functions will be at least as strong 45168404Spjd * as current best-of-breed, and may be substantially faster as well. 46168404Spjd * We want the ability to take advantage of these new hashes as soon as 47168404Spjd * they become available. 48168404Spjd * 49168404Spjd * 3. If someone develops hardware that can compute a strong hash quickly, 50168404Spjd * we want the ability to take advantage of that hardware. 51168404Spjd * 52168404Spjd * Of course, we don't want a checksum upgrade to invalidate existing 53219089Spjd * data, so we store the checksum *function* in eight bits of the bp. 54219089Spjd * This gives us room for up to 256 different checksum functions. 55168404Spjd * 56168404Spjd * When writing a block, we always checksum it with the latest-and-greatest 57168404Spjd * checksum function of the appropriate strength. When reading a block, 58168404Spjd * we compare the expected checksum against the actual checksum, which we 59219089Spjd * compute via the checksum function specified by BP_GET_CHECKSUM(bp). 60168404Spjd */ 61168404Spjd 62168404Spjd/*ARGSUSED*/ 63168404Spjdstatic void 64168404Spjdzio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) 65168404Spjd{ 66168404Spjd ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); 67168404Spjd} 68168404Spjd 69168404Spjdzio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { 70219089Spjd {{NULL, NULL}, 0, 0, 0, "inherit"}, 71219089Spjd {{NULL, NULL}, 0, 0, 0, "on"}, 72219089Spjd {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "off"}, 73219089Spjd {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "label"}, 74219089Spjd {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "gang_header"}, 75219089Spjd {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, 0, "zilog"}, 76219089Spjd {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, 0, "fletcher2"}, 77219089Spjd {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"}, 78219089Spjd {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"}, 79219089Spjd {{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"}, 80168404Spjd}; 81168404Spjd 82219089Spjdenum zio_checksum 83219089Spjdzio_checksum_select(enum zio_checksum child, enum zio_checksum parent) 84168404Spjd{ 85168404Spjd ASSERT(child < ZIO_CHECKSUM_FUNCTIONS); 86168404Spjd ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS); 87168404Spjd ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); 88168404Spjd 89168404Spjd if (child == ZIO_CHECKSUM_INHERIT) 90168404Spjd return (parent); 91168404Spjd 92168404Spjd if (child == ZIO_CHECKSUM_ON) 93168404Spjd return (ZIO_CHECKSUM_ON_VALUE); 94168404Spjd 95168404Spjd return (child); 96168404Spjd} 97168404Spjd 98219089Spjdenum zio_checksum 99219089Spjdzio_checksum_dedup_select(spa_t *spa, enum zio_checksum child, 100219089Spjd enum zio_checksum parent) 101219089Spjd{ 102219089Spjd ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); 103219089Spjd ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); 104219089Spjd ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); 105219089Spjd 106219089Spjd if (child == ZIO_CHECKSUM_INHERIT) 107219089Spjd return (parent); 108219089Spjd 109219089Spjd if (child == ZIO_CHECKSUM_ON) 110219089Spjd return (spa_dedup_checksum(spa)); 111219089Spjd 112219089Spjd if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY)) 113219089Spjd return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY); 114219089Spjd 115219089Spjd ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup || 116219089Spjd (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF); 117219089Spjd 118219089Spjd return (child); 119219089Spjd} 120219089Spjd 121168404Spjd/* 122185029Spjd * Set the external verifier for a gang block based on <vdev, offset, txg>, 123185029Spjd * a tuple which is guaranteed to be unique for the life of the pool. 124185029Spjd */ 125185029Spjdstatic void 126185029Spjdzio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp) 127185029Spjd{ 128185029Spjd dva_t *dva = BP_IDENTITY(bp); 129219089Spjd uint64_t txg = BP_PHYSICAL_BIRTH(bp); 130185029Spjd 131185029Spjd ASSERT(BP_IS_GANG(bp)); 132185029Spjd 133185029Spjd ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0); 134185029Spjd} 135185029Spjd 136185029Spjd/* 137185029Spjd * Set the external verifier for a label block based on its offset. 138185029Spjd * The vdev is implicit, and the txg is unknowable at pool open time -- 139185029Spjd * hence the logic in vdev_uberblock_load() to find the most recent copy. 140185029Spjd */ 141185029Spjdstatic void 142185029Spjdzio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset) 143185029Spjd{ 144185029Spjd ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0); 145185029Spjd} 146185029Spjd 147185029Spjd/* 148168404Spjd * Generate the checksum. 149168404Spjd */ 150168404Spjdvoid 151185029Spjdzio_checksum_compute(zio_t *zio, enum zio_checksum checksum, 152185029Spjd void *data, uint64_t size) 153168404Spjd{ 154185029Spjd blkptr_t *bp = zio->io_bp; 155185029Spjd uint64_t offset = zio->io_offset; 156168404Spjd zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 157219089Spjd zio_cksum_t cksum; 158168404Spjd 159185029Spjd ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); 160168404Spjd ASSERT(ci->ci_func[0] != NULL); 161168404Spjd 162219089Spjd if (ci->ci_eck) { 163219089Spjd zio_eck_t *eck; 164219089Spjd 165219089Spjd if (checksum == ZIO_CHECKSUM_ZILOG2) { 166219089Spjd zil_chain_t *zilc = data; 167219089Spjd 168219089Spjd size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ, 169219089Spjd uint64_t); 170219089Spjd eck = &zilc->zc_eck; 171219089Spjd } else { 172219089Spjd eck = (zio_eck_t *)((char *)data + size) - 1; 173219089Spjd } 174185029Spjd if (checksum == ZIO_CHECKSUM_GANG_HEADER) 175219089Spjd zio_checksum_gang_verifier(&eck->zec_cksum, bp); 176185029Spjd else if (checksum == ZIO_CHECKSUM_LABEL) 177219089Spjd zio_checksum_label_verifier(&eck->zec_cksum, offset); 178185029Spjd else 179219089Spjd bp->blk_cksum = eck->zec_cksum; 180219089Spjd eck->zec_magic = ZEC_MAGIC; 181219089Spjd ci->ci_func[0](data, size, &cksum); 182219089Spjd eck->zec_cksum = cksum; 183168404Spjd } else { 184185029Spjd ci->ci_func[0](data, size, &bp->blk_cksum); 185168404Spjd } 186168404Spjd} 187168404Spjd 188168404Spjdint 189219089Spjdzio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) 190168404Spjd{ 191168404Spjd blkptr_t *bp = zio->io_bp; 192185029Spjd uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum : 193185029Spjd (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp))); 194185029Spjd int byteswap; 195219089Spjd int error; 196185029Spjd uint64_t size = (bp == NULL ? zio->io_size : 197185029Spjd (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp))); 198185029Spjd uint64_t offset = zio->io_offset; 199219089Spjd void *data = zio->io_data; 200168404Spjd zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 201185029Spjd zio_cksum_t actual_cksum, expected_cksum, verifier; 202168404Spjd 203168404Spjd if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) 204168404Spjd return (EINVAL); 205168404Spjd 206219089Spjd if (ci->ci_eck) { 207219089Spjd zio_eck_t *eck; 208219089Spjd 209219089Spjd if (checksum == ZIO_CHECKSUM_ZILOG2) { 210219089Spjd zil_chain_t *zilc = data; 211219089Spjd uint64_t nused; 212219089Spjd 213219089Spjd eck = &zilc->zc_eck; 214219089Spjd if (eck->zec_magic == ZEC_MAGIC) 215219089Spjd nused = zilc->zc_nused; 216219089Spjd else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) 217219089Spjd nused = BSWAP_64(zilc->zc_nused); 218219089Spjd else 219219089Spjd return (ECKSUM); 220219089Spjd 221219089Spjd if (nused > size) 222219089Spjd return (ECKSUM); 223219089Spjd 224219089Spjd size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); 225219089Spjd } else { 226219089Spjd eck = (zio_eck_t *)((char *)data + size) - 1; 227219089Spjd } 228219089Spjd 229168404Spjd if (checksum == ZIO_CHECKSUM_GANG_HEADER) 230185029Spjd zio_checksum_gang_verifier(&verifier, bp); 231185029Spjd else if (checksum == ZIO_CHECKSUM_LABEL) 232185029Spjd zio_checksum_label_verifier(&verifier, offset); 233185029Spjd else 234185029Spjd verifier = bp->blk_cksum; 235168404Spjd 236219089Spjd byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); 237185029Spjd 238185029Spjd if (byteswap) 239185029Spjd byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); 240185029Spjd 241219089Spjd expected_cksum = eck->zec_cksum; 242219089Spjd eck->zec_cksum = verifier; 243185029Spjd ci->ci_func[byteswap](data, size, &actual_cksum); 244219089Spjd eck->zec_cksum = expected_cksum; 245185029Spjd 246185029Spjd if (byteswap) 247168404Spjd byteswap_uint64_array(&expected_cksum, 248168404Spjd sizeof (zio_cksum_t)); 249168404Spjd } else { 250168404Spjd ASSERT(!BP_IS_GANG(bp)); 251185029Spjd byteswap = BP_SHOULD_BYTESWAP(bp); 252185029Spjd expected_cksum = bp->blk_cksum; 253168404Spjd ci->ci_func[byteswap](data, size, &actual_cksum); 254168404Spjd } 255168404Spjd 256219089Spjd info->zbc_expected = expected_cksum; 257219089Spjd info->zbc_actual = actual_cksum; 258219089Spjd info->zbc_checksum_name = ci->ci_name; 259219089Spjd info->zbc_byteswapped = byteswap; 260219089Spjd info->zbc_injected = 0; 261219089Spjd info->zbc_has_cksum = 1; 262219089Spjd 263185029Spjd if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) 264168404Spjd return (ECKSUM); 265168404Spjd 266219089Spjd if (zio_injection_enabled && !zio->io_error && 267219089Spjd (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) { 268168404Spjd 269219089Spjd info->zbc_injected = 1; 270219089Spjd return (error); 271219089Spjd } 272219089Spjd 273168404Spjd return (0); 274168404Spjd} 275