1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23249643Smm * Copyright (c) 2013 by Delphix. All rights reserved.
24262089Savg * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25168404Spjd */
26168404Spjd
27168404Spjd#include <sys/zfs_context.h>
28168404Spjd#include <sys/spa.h>
29168404Spjd#include <sys/zio.h>
30168404Spjd#include <sys/zio_checksum.h>
31219089Spjd#include <sys/zil.h>
32219089Spjd#include <zfs_fletcher.h>
33168404Spjd
34168404Spjd/*
35168404Spjd * Checksum vectors.
36168404Spjd *
37168404Spjd * In the SPA, everything is checksummed.  We support checksum vectors
38168404Spjd * for three distinct reasons:
39168404Spjd *
40168404Spjd *   1. Different kinds of data need different levels of protection.
41168404Spjd *	For SPA metadata, we always want a very strong checksum.
42168404Spjd *	For user data, we let users make the trade-off between speed
43168404Spjd *	and checksum strength.
44168404Spjd *
45168404Spjd *   2. Cryptographic hash and MAC algorithms are an area of active research.
46168404Spjd *	It is likely that in future hash functions will be at least as strong
47168404Spjd *	as current best-of-breed, and may be substantially faster as well.
48168404Spjd *	We want the ability to take advantage of these new hashes as soon as
49168404Spjd *	they become available.
50168404Spjd *
51168404Spjd *   3. If someone develops hardware that can compute a strong hash quickly,
52168404Spjd *	we want the ability to take advantage of that hardware.
53168404Spjd *
54168404Spjd * Of course, we don't want a checksum upgrade to invalidate existing
55219089Spjd * data, so we store the checksum *function* in eight bits of the bp.
56219089Spjd * This gives us room for up to 256 different checksum functions.
57168404Spjd *
58168404Spjd * When writing a block, we always checksum it with the latest-and-greatest
59168404Spjd * checksum function of the appropriate strength.  When reading a block,
60168404Spjd * we compare the expected checksum against the actual checksum, which we
61219089Spjd * compute via the checksum function specified by BP_GET_CHECKSUM(bp).
62168404Spjd */
63168404Spjd
64168404Spjd/*ARGSUSED*/
65168404Spjdstatic void
66168404Spjdzio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
67168404Spjd{
68168404Spjd	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
69168404Spjd}
70168404Spjd
71168404Spjdzio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
72219089Spjd	{{NULL,			NULL},			0, 0, 0, "inherit"},
73219089Spjd	{{NULL,			NULL},			0, 0, 0, "on"},
74219089Spjd	{{zio_checksum_off,	zio_checksum_off},	0, 0, 0, "off"},
75219089Spjd	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 1, 0, "label"},
76219089Spjd	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 1, 0, "gang_header"},
77219089Spjd	{{fletcher_2_native,	fletcher_2_byteswap},	0, 1, 0, "zilog"},
78219089Spjd	{{fletcher_2_native,	fletcher_2_byteswap},	0, 0, 0, "fletcher2"},
79219089Spjd	{{fletcher_4_native,	fletcher_4_byteswap},	1, 0, 0, "fletcher4"},
80219089Spjd	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 0, 1, "sha256"},
81219089Spjd	{{fletcher_4_native,	fletcher_4_byteswap},	0, 1, 0, "zilog2"},
82262089Savg	{{zio_checksum_off,	zio_checksum_off},	0, 0, 0, "noparity"},
83168404Spjd};
84168404Spjd
85219089Spjdenum zio_checksum
86219089Spjdzio_checksum_select(enum zio_checksum child, enum zio_checksum parent)
87168404Spjd{
88168404Spjd	ASSERT(child < ZIO_CHECKSUM_FUNCTIONS);
89168404Spjd	ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS);
90168404Spjd	ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON);
91168404Spjd
92168404Spjd	if (child == ZIO_CHECKSUM_INHERIT)
93168404Spjd		return (parent);
94168404Spjd
95168404Spjd	if (child == ZIO_CHECKSUM_ON)
96168404Spjd		return (ZIO_CHECKSUM_ON_VALUE);
97168404Spjd
98168404Spjd	return (child);
99168404Spjd}
100168404Spjd
101219089Spjdenum zio_checksum
102219089Spjdzio_checksum_dedup_select(spa_t *spa, enum zio_checksum child,
103219089Spjd    enum zio_checksum parent)
104219089Spjd{
105219089Spjd	ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS);
106219089Spjd	ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS);
107219089Spjd	ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON);
108219089Spjd
109219089Spjd	if (child == ZIO_CHECKSUM_INHERIT)
110219089Spjd		return (parent);
111219089Spjd
112219089Spjd	if (child == ZIO_CHECKSUM_ON)
113219089Spjd		return (spa_dedup_checksum(spa));
114219089Spjd
115219089Spjd	if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY))
116219089Spjd		return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY);
117219089Spjd
118219089Spjd	ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup ||
119219089Spjd	    (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF);
120219089Spjd
121219089Spjd	return (child);
122219089Spjd}
123219089Spjd
124168404Spjd/*
125185029Spjd * Set the external verifier for a gang block based on <vdev, offset, txg>,
126185029Spjd * a tuple which is guaranteed to be unique for the life of the pool.
127185029Spjd */
128185029Spjdstatic void
129185029Spjdzio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp)
130185029Spjd{
131185029Spjd	dva_t *dva = BP_IDENTITY(bp);
132219089Spjd	uint64_t txg = BP_PHYSICAL_BIRTH(bp);
133185029Spjd
134185029Spjd	ASSERT(BP_IS_GANG(bp));
135185029Spjd
136185029Spjd	ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0);
137185029Spjd}
138185029Spjd
139185029Spjd/*
140185029Spjd * Set the external verifier for a label block based on its offset.
141185029Spjd * The vdev is implicit, and the txg is unknowable at pool open time --
142185029Spjd * hence the logic in vdev_uberblock_load() to find the most recent copy.
143185029Spjd */
144185029Spjdstatic void
145185029Spjdzio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset)
146185029Spjd{
147185029Spjd	ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0);
148185029Spjd}
149185029Spjd
150185029Spjd/*
151168404Spjd * Generate the checksum.
152168404Spjd */
153168404Spjdvoid
154185029Spjdzio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
155185029Spjd	void *data, uint64_t size)
156168404Spjd{
157185029Spjd	blkptr_t *bp = zio->io_bp;
158185029Spjd	uint64_t offset = zio->io_offset;
159168404Spjd	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
160219089Spjd	zio_cksum_t cksum;
161168404Spjd
162185029Spjd	ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
163168404Spjd	ASSERT(ci->ci_func[0] != NULL);
164168404Spjd
165219089Spjd	if (ci->ci_eck) {
166219089Spjd		zio_eck_t *eck;
167219089Spjd
168219089Spjd		if (checksum == ZIO_CHECKSUM_ZILOG2) {
169219089Spjd			zil_chain_t *zilc = data;
170219089Spjd
171219089Spjd			size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ,
172219089Spjd			    uint64_t);
173219089Spjd			eck = &zilc->zc_eck;
174219089Spjd		} else {
175219089Spjd			eck = (zio_eck_t *)((char *)data + size) - 1;
176219089Spjd		}
177185029Spjd		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
178219089Spjd			zio_checksum_gang_verifier(&eck->zec_cksum, bp);
179185029Spjd		else if (checksum == ZIO_CHECKSUM_LABEL)
180219089Spjd			zio_checksum_label_verifier(&eck->zec_cksum, offset);
181185029Spjd		else
182219089Spjd			bp->blk_cksum = eck->zec_cksum;
183219089Spjd		eck->zec_magic = ZEC_MAGIC;
184219089Spjd		ci->ci_func[0](data, size, &cksum);
185219089Spjd		eck->zec_cksum = cksum;
186168404Spjd	} else {
187185029Spjd		ci->ci_func[0](data, size, &bp->blk_cksum);
188168404Spjd	}
189168404Spjd}
190168404Spjd
191168404Spjdint
192219089Spjdzio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
193168404Spjd{
194168404Spjd	blkptr_t *bp = zio->io_bp;
195185029Spjd	uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum :
196185029Spjd	    (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
197185029Spjd	int byteswap;
198219089Spjd	int error;
199185029Spjd	uint64_t size = (bp == NULL ? zio->io_size :
200185029Spjd	    (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp)));
201185029Spjd	uint64_t offset = zio->io_offset;
202219089Spjd	void *data = zio->io_data;
203168404Spjd	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
204185029Spjd	zio_cksum_t actual_cksum, expected_cksum, verifier;
205168404Spjd
206168404Spjd	if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
207249643Smm		return (SET_ERROR(EINVAL));
208168404Spjd
209219089Spjd	if (ci->ci_eck) {
210219089Spjd		zio_eck_t *eck;
211219089Spjd
212219089Spjd		if (checksum == ZIO_CHECKSUM_ZILOG2) {
213219089Spjd			zil_chain_t *zilc = data;
214219089Spjd			uint64_t nused;
215219089Spjd
216219089Spjd			eck = &zilc->zc_eck;
217219089Spjd			if (eck->zec_magic == ZEC_MAGIC)
218219089Spjd				nused = zilc->zc_nused;
219219089Spjd			else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC))
220219089Spjd				nused = BSWAP_64(zilc->zc_nused);
221219089Spjd			else
222249643Smm				return (SET_ERROR(ECKSUM));
223219089Spjd
224219089Spjd			if (nused > size)
225249643Smm				return (SET_ERROR(ECKSUM));
226219089Spjd
227219089Spjd			size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
228219089Spjd		} else {
229219089Spjd			eck = (zio_eck_t *)((char *)data + size) - 1;
230219089Spjd		}
231219089Spjd
232168404Spjd		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
233185029Spjd			zio_checksum_gang_verifier(&verifier, bp);
234185029Spjd		else if (checksum == ZIO_CHECKSUM_LABEL)
235185029Spjd			zio_checksum_label_verifier(&verifier, offset);
236185029Spjd		else
237185029Spjd			verifier = bp->blk_cksum;
238168404Spjd
239219089Spjd		byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
240185029Spjd
241185029Spjd		if (byteswap)
242185029Spjd			byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
243185029Spjd
244219089Spjd		expected_cksum = eck->zec_cksum;
245219089Spjd		eck->zec_cksum = verifier;
246185029Spjd		ci->ci_func[byteswap](data, size, &actual_cksum);
247219089Spjd		eck->zec_cksum = expected_cksum;
248185029Spjd
249185029Spjd		if (byteswap)
250168404Spjd			byteswap_uint64_array(&expected_cksum,
251168404Spjd			    sizeof (zio_cksum_t));
252168404Spjd	} else {
253168404Spjd		ASSERT(!BP_IS_GANG(bp));
254185029Spjd		byteswap = BP_SHOULD_BYTESWAP(bp);
255185029Spjd		expected_cksum = bp->blk_cksum;
256168404Spjd		ci->ci_func[byteswap](data, size, &actual_cksum);
257168404Spjd	}
258168404Spjd
259219089Spjd	info->zbc_expected = expected_cksum;
260219089Spjd	info->zbc_actual = actual_cksum;
261219089Spjd	info->zbc_checksum_name = ci->ci_name;
262219089Spjd	info->zbc_byteswapped = byteswap;
263219089Spjd	info->zbc_injected = 0;
264219089Spjd	info->zbc_has_cksum = 1;
265219089Spjd
266185029Spjd	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
267249643Smm		return (SET_ERROR(ECKSUM));
268168404Spjd
269219089Spjd	if (zio_injection_enabled && !zio->io_error &&
270219089Spjd	    (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) {
271168404Spjd
272219089Spjd		info->zbc_injected = 1;
273219089Spjd		return (error);
274219089Spjd	}
275219089Spjd
276168404Spjd	return (0);
277168404Spjd}
278