zio_checksum.c revision 219089
1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23168404Spjd */
24168404Spjd
25168404Spjd#include <sys/zfs_context.h>
26168404Spjd#include <sys/spa.h>
27168404Spjd#include <sys/zio.h>
28168404Spjd#include <sys/zio_checksum.h>
29219089Spjd#include <sys/zil.h>
30219089Spjd#include <zfs_fletcher.h>
31168404Spjd
32168404Spjd/*
33168404Spjd * Checksum vectors.
34168404Spjd *
35168404Spjd * In the SPA, everything is checksummed.  We support checksum vectors
36168404Spjd * for three distinct reasons:
37168404Spjd *
38168404Spjd *   1. Different kinds of data need different levels of protection.
39168404Spjd *	For SPA metadata, we always want a very strong checksum.
40168404Spjd *	For user data, we let users make the trade-off between speed
41168404Spjd *	and checksum strength.
42168404Spjd *
43168404Spjd *   2. Cryptographic hash and MAC algorithms are an area of active research.
44168404Spjd *	It is likely that in future hash functions will be at least as strong
45168404Spjd *	as current best-of-breed, and may be substantially faster as well.
46168404Spjd *	We want the ability to take advantage of these new hashes as soon as
47168404Spjd *	they become available.
48168404Spjd *
49168404Spjd *   3. If someone develops hardware that can compute a strong hash quickly,
50168404Spjd *	we want the ability to take advantage of that hardware.
51168404Spjd *
52168404Spjd * Of course, we don't want a checksum upgrade to invalidate existing
53219089Spjd * data, so we store the checksum *function* in eight bits of the bp.
54219089Spjd * This gives us room for up to 256 different checksum functions.
55168404Spjd *
56168404Spjd * When writing a block, we always checksum it with the latest-and-greatest
57168404Spjd * checksum function of the appropriate strength.  When reading a block,
58168404Spjd * we compare the expected checksum against the actual checksum, which we
59219089Spjd * compute via the checksum function specified by BP_GET_CHECKSUM(bp).
60168404Spjd */
61168404Spjd
62168404Spjd/*ARGSUSED*/
63168404Spjdstatic void
64168404Spjdzio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
65168404Spjd{
66168404Spjd	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
67168404Spjd}
68168404Spjd
69168404Spjdzio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
70219089Spjd	{{NULL,			NULL},			0, 0, 0, "inherit"},
71219089Spjd	{{NULL,			NULL},			0, 0, 0, "on"},
72219089Spjd	{{zio_checksum_off,	zio_checksum_off},	0, 0, 0, "off"},
73219089Spjd	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 1, 0, "label"},
74219089Spjd	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 1, 0, "gang_header"},
75219089Spjd	{{fletcher_2_native,	fletcher_2_byteswap},	0, 1, 0, "zilog"},
76219089Spjd	{{fletcher_2_native,	fletcher_2_byteswap},	0, 0, 0, "fletcher2"},
77219089Spjd	{{fletcher_4_native,	fletcher_4_byteswap},	1, 0, 0, "fletcher4"},
78219089Spjd	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 0, 1, "sha256"},
79219089Spjd	{{fletcher_4_native,	fletcher_4_byteswap},	0, 1, 0, "zilog2"},
80168404Spjd};
81168404Spjd
82219089Spjdenum zio_checksum
83219089Spjdzio_checksum_select(enum zio_checksum child, enum zio_checksum parent)
84168404Spjd{
85168404Spjd	ASSERT(child < ZIO_CHECKSUM_FUNCTIONS);
86168404Spjd	ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS);
87168404Spjd	ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON);
88168404Spjd
89168404Spjd	if (child == ZIO_CHECKSUM_INHERIT)
90168404Spjd		return (parent);
91168404Spjd
92168404Spjd	if (child == ZIO_CHECKSUM_ON)
93168404Spjd		return (ZIO_CHECKSUM_ON_VALUE);
94168404Spjd
95168404Spjd	return (child);
96168404Spjd}
97168404Spjd
98219089Spjdenum zio_checksum
99219089Spjdzio_checksum_dedup_select(spa_t *spa, enum zio_checksum child,
100219089Spjd    enum zio_checksum parent)
101219089Spjd{
102219089Spjd	ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS);
103219089Spjd	ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS);
104219089Spjd	ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON);
105219089Spjd
106219089Spjd	if (child == ZIO_CHECKSUM_INHERIT)
107219089Spjd		return (parent);
108219089Spjd
109219089Spjd	if (child == ZIO_CHECKSUM_ON)
110219089Spjd		return (spa_dedup_checksum(spa));
111219089Spjd
112219089Spjd	if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY))
113219089Spjd		return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY);
114219089Spjd
115219089Spjd	ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup ||
116219089Spjd	    (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF);
117219089Spjd
118219089Spjd	return (child);
119219089Spjd}
120219089Spjd
121168404Spjd/*
122185029Spjd * Set the external verifier for a gang block based on <vdev, offset, txg>,
123185029Spjd * a tuple which is guaranteed to be unique for the life of the pool.
124185029Spjd */
125185029Spjdstatic void
126185029Spjdzio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp)
127185029Spjd{
128185029Spjd	dva_t *dva = BP_IDENTITY(bp);
129219089Spjd	uint64_t txg = BP_PHYSICAL_BIRTH(bp);
130185029Spjd
131185029Spjd	ASSERT(BP_IS_GANG(bp));
132185029Spjd
133185029Spjd	ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0);
134185029Spjd}
135185029Spjd
136185029Spjd/*
137185029Spjd * Set the external verifier for a label block based on its offset.
138185029Spjd * The vdev is implicit, and the txg is unknowable at pool open time --
139185029Spjd * hence the logic in vdev_uberblock_load() to find the most recent copy.
140185029Spjd */
141185029Spjdstatic void
142185029Spjdzio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset)
143185029Spjd{
144185029Spjd	ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0);
145185029Spjd}
146185029Spjd
147185029Spjd/*
148168404Spjd * Generate the checksum.
149168404Spjd */
150168404Spjdvoid
151185029Spjdzio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
152185029Spjd	void *data, uint64_t size)
153168404Spjd{
154185029Spjd	blkptr_t *bp = zio->io_bp;
155185029Spjd	uint64_t offset = zio->io_offset;
156168404Spjd	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
157219089Spjd	zio_cksum_t cksum;
158168404Spjd
159185029Spjd	ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
160168404Spjd	ASSERT(ci->ci_func[0] != NULL);
161168404Spjd
162219089Spjd	if (ci->ci_eck) {
163219089Spjd		zio_eck_t *eck;
164219089Spjd
165219089Spjd		if (checksum == ZIO_CHECKSUM_ZILOG2) {
166219089Spjd			zil_chain_t *zilc = data;
167219089Spjd
168219089Spjd			size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ,
169219089Spjd			    uint64_t);
170219089Spjd			eck = &zilc->zc_eck;
171219089Spjd		} else {
172219089Spjd			eck = (zio_eck_t *)((char *)data + size) - 1;
173219089Spjd		}
174185029Spjd		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
175219089Spjd			zio_checksum_gang_verifier(&eck->zec_cksum, bp);
176185029Spjd		else if (checksum == ZIO_CHECKSUM_LABEL)
177219089Spjd			zio_checksum_label_verifier(&eck->zec_cksum, offset);
178185029Spjd		else
179219089Spjd			bp->blk_cksum = eck->zec_cksum;
180219089Spjd		eck->zec_magic = ZEC_MAGIC;
181219089Spjd		ci->ci_func[0](data, size, &cksum);
182219089Spjd		eck->zec_cksum = cksum;
183168404Spjd	} else {
184185029Spjd		ci->ci_func[0](data, size, &bp->blk_cksum);
185168404Spjd	}
186168404Spjd}
187168404Spjd
188168404Spjdint
189219089Spjdzio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
190168404Spjd{
191168404Spjd	blkptr_t *bp = zio->io_bp;
192185029Spjd	uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum :
193185029Spjd	    (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
194185029Spjd	int byteswap;
195219089Spjd	int error;
196185029Spjd	uint64_t size = (bp == NULL ? zio->io_size :
197185029Spjd	    (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp)));
198185029Spjd	uint64_t offset = zio->io_offset;
199219089Spjd	void *data = zio->io_data;
200168404Spjd	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
201185029Spjd	zio_cksum_t actual_cksum, expected_cksum, verifier;
202168404Spjd
203168404Spjd	if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
204168404Spjd		return (EINVAL);
205168404Spjd
206219089Spjd	if (ci->ci_eck) {
207219089Spjd		zio_eck_t *eck;
208219089Spjd
209219089Spjd		if (checksum == ZIO_CHECKSUM_ZILOG2) {
210219089Spjd			zil_chain_t *zilc = data;
211219089Spjd			uint64_t nused;
212219089Spjd
213219089Spjd			eck = &zilc->zc_eck;
214219089Spjd			if (eck->zec_magic == ZEC_MAGIC)
215219089Spjd				nused = zilc->zc_nused;
216219089Spjd			else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC))
217219089Spjd				nused = BSWAP_64(zilc->zc_nused);
218219089Spjd			else
219219089Spjd				return (ECKSUM);
220219089Spjd
221219089Spjd			if (nused > size)
222219089Spjd				return (ECKSUM);
223219089Spjd
224219089Spjd			size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
225219089Spjd		} else {
226219089Spjd			eck = (zio_eck_t *)((char *)data + size) - 1;
227219089Spjd		}
228219089Spjd
229168404Spjd		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
230185029Spjd			zio_checksum_gang_verifier(&verifier, bp);
231185029Spjd		else if (checksum == ZIO_CHECKSUM_LABEL)
232185029Spjd			zio_checksum_label_verifier(&verifier, offset);
233185029Spjd		else
234185029Spjd			verifier = bp->blk_cksum;
235168404Spjd
236219089Spjd		byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
237185029Spjd
238185029Spjd		if (byteswap)
239185029Spjd			byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
240185029Spjd
241219089Spjd		expected_cksum = eck->zec_cksum;
242219089Spjd		eck->zec_cksum = verifier;
243185029Spjd		ci->ci_func[byteswap](data, size, &actual_cksum);
244219089Spjd		eck->zec_cksum = expected_cksum;
245185029Spjd
246185029Spjd		if (byteswap)
247168404Spjd			byteswap_uint64_array(&expected_cksum,
248168404Spjd			    sizeof (zio_cksum_t));
249168404Spjd	} else {
250168404Spjd		ASSERT(!BP_IS_GANG(bp));
251185029Spjd		byteswap = BP_SHOULD_BYTESWAP(bp);
252185029Spjd		expected_cksum = bp->blk_cksum;
253168404Spjd		ci->ci_func[byteswap](data, size, &actual_cksum);
254168404Spjd	}
255168404Spjd
256219089Spjd	info->zbc_expected = expected_cksum;
257219089Spjd	info->zbc_actual = actual_cksum;
258219089Spjd	info->zbc_checksum_name = ci->ci_name;
259219089Spjd	info->zbc_byteswapped = byteswap;
260219089Spjd	info->zbc_injected = 0;
261219089Spjd	info->zbc_has_cksum = 1;
262219089Spjd
263185029Spjd	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
264168404Spjd		return (ECKSUM);
265168404Spjd
266219089Spjd	if (zio_injection_enabled && !zio->io_error &&
267219089Spjd	    (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) {
268168404Spjd
269219089Spjd		info->zbc_injected = 1;
270219089Spjd		return (error);
271219089Spjd	}
272219089Spjd
273168404Spjd	return (0);
274168404Spjd}
275