1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (C) 2016 Gvozden Ne��kovi��. All rights reserved.
23 */
24
25#ifndef _VDEV_RAIDZ_MATH_IMPL_H
26#define	_VDEV_RAIDZ_MATH_IMPL_H
27
28#include <sys/types.h>
29#include <sys/vdev_raidz_impl.h>
30
31#define	raidz_inline inline __attribute__((always_inline))
32#ifndef noinline
33#define	noinline __attribute__((noinline))
34#endif
35
36/*
37 * Functions calculate multiplication constants for data reconstruction.
38 * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
39 * used parity columns for reconstruction.
40 * @rr			RAIDZ row
41 * @tgtidx		array of missing data indexes
42 * @coeff		output array of coefficients. Array must be provided by
43 *         		user and must hold minimum MUL_CNT values.
44 */
45static noinline void
46raidz_rec_q_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
47{
48	const unsigned ncols = rr->rr_cols;
49	const unsigned x = tgtidx[TARGET_X];
50
51	coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
52}
53
54static noinline void
55raidz_rec_r_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
56{
57	const unsigned ncols = rr->rr_cols;
58	const unsigned x = tgtidx[TARGET_X];
59
60	coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
61}
62
63static noinline void
64raidz_rec_pq_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
65{
66	const unsigned ncols = rr->rr_cols;
67	const unsigned x = tgtidx[TARGET_X];
68	const unsigned y = tgtidx[TARGET_Y];
69	gf_t a, b, e;
70
71	a = gf_exp2(x + 255 - y);
72	b = gf_exp2(255 - (ncols - x - 1));
73	e = a ^ 0x01;
74
75	coeff[MUL_PQ_X] = gf_div(a, e);
76	coeff[MUL_PQ_Y] = gf_div(b, e);
77}
78
79static noinline void
80raidz_rec_pr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
81{
82	const unsigned ncols = rr->rr_cols;
83	const unsigned x = tgtidx[TARGET_X];
84	const unsigned y = tgtidx[TARGET_Y];
85
86	gf_t a, b, e;
87
88	a = gf_exp4(x + 255 - y);
89	b = gf_exp4(255 - (ncols - x - 1));
90	e = a ^ 0x01;
91
92	coeff[MUL_PR_X] = gf_div(a, e);
93	coeff[MUL_PR_Y] = gf_div(b, e);
94}
95
96static noinline void
97raidz_rec_qr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
98{
99	const unsigned ncols = rr->rr_cols;
100	const unsigned x = tgtidx[TARGET_X];
101	const unsigned y = tgtidx[TARGET_Y];
102
103	gf_t nx, ny, nxxy, nxyy, d;
104
105	nx = gf_exp2(ncols - x - 1);
106	ny = gf_exp2(ncols - y - 1);
107	nxxy = gf_mul(gf_mul(nx, nx), ny);
108	nxyy = gf_mul(gf_mul(nx, ny), ny);
109	d = nxxy ^ nxyy;
110
111	coeff[MUL_QR_XQ] = ny;
112	coeff[MUL_QR_X]	= gf_div(ny, d);
113	coeff[MUL_QR_YQ] = nx;
114	coeff[MUL_QR_Y]	= gf_div(nx, d);
115}
116
117static noinline void
118raidz_rec_pqr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
119{
120	const unsigned ncols = rr->rr_cols;
121	const unsigned x = tgtidx[TARGET_X];
122	const unsigned y = tgtidx[TARGET_Y];
123	const unsigned z = tgtidx[TARGET_Z];
124
125	gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
126
127	nx = gf_exp2(ncols - x - 1);
128	ny = gf_exp2(ncols - y - 1);
129	nz = gf_exp2(ncols - z - 1);
130
131	nxx = gf_exp4(ncols - x - 1);
132	nyy = gf_exp4(ncols - y - 1);
133	nzz = gf_exp4(ncols - z - 1);
134
135	nyyz = gf_mul(gf_mul(ny, nz), ny);
136	nyzz = gf_mul(nzz, ny);
137
138	xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
139	    gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^  nyzz;
140
141	yd = gf_inv(ny ^ nz);
142
143	coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
144	coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
145	coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
146	coeff[MUL_PQR_YU] = nx;
147	coeff[MUL_PQR_YP] = gf_mul(nz, yd);
148	coeff[MUL_PQR_YQ] = yd;
149}
150
151/*
152 * Method for zeroing a buffer (can be implemented using SIMD).
153 * This method is used by multiple for gen/rec functions.
154 *
155 * @dc		Destination buffer
156 * @dsize	Destination buffer size
157 * @private	Unused
158 */
159static int
160raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
161{
162	v_t *dst = (v_t *)dc;
163	size_t i;
164
165	ZERO_DEFINE();
166
167	(void) private; /* unused */
168
169	ZERO(ZERO_D);
170
171	for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
172		STORE(dst + i, ZERO_D);
173		STORE(dst + i + ZERO_STRIDE, ZERO_D);
174	}
175
176	return (0);
177}
178
179#define	raidz_zero(dabd, size)						\
180{									\
181	abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL);	\
182}
183
184/*
185 * Method for copying two buffers (can be implemented using SIMD).
186 * This method is used by multiple for gen/rec functions.
187 *
188 * @dc		Destination buffer
189 * @sc		Source buffer
190 * @dsize	Destination buffer size
191 * @ssize	Source buffer size
192 * @private	Unused
193 */
194static int
195raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
196{
197	v_t *dst = (v_t *)dc;
198	const v_t *src = (v_t *)sc;
199	size_t i;
200
201	COPY_DEFINE();
202
203	(void) private; /* unused */
204
205	for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
206		LOAD(src + i, COPY_D);
207		STORE(dst + i, COPY_D);
208
209		LOAD(src + i + COPY_STRIDE, COPY_D);
210		STORE(dst + i + COPY_STRIDE, COPY_D);
211	}
212
213	return (0);
214}
215
216
217#define	raidz_copy(dabd, sabd, off, size)				\
218{									\
219	abd_iterate_func2(dabd, sabd, off, off, size, raidz_copy_abd_cb, \
220	    NULL);							\
221}
222
223/*
224 * Method for adding (XORing) two buffers.
225 * Source and destination are XORed together and result is stored in
226 * destination buffer. This method is used by multiple for gen/rec functions.
227 *
228 * @dc		Destination buffer
229 * @sc		Source buffer
230 * @dsize	Destination buffer size
231 * @ssize	Source buffer size
232 * @private	Unused
233 */
234static int
235raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
236{
237	v_t *dst = (v_t *)dc;
238	const v_t *src = (v_t *)sc;
239	size_t i;
240
241	ADD_DEFINE();
242
243	(void) private; /* unused */
244
245	for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
246		LOAD(dst + i, ADD_D);
247		XOR_ACC(src + i, ADD_D);
248		STORE(dst + i, ADD_D);
249
250		LOAD(dst + i + ADD_STRIDE, ADD_D);
251		XOR_ACC(src + i + ADD_STRIDE, ADD_D);
252		STORE(dst + i + ADD_STRIDE, ADD_D);
253	}
254
255	return (0);
256}
257
258#define	raidz_add(dabd, sabd, off, size)				\
259{									\
260	abd_iterate_func2(dabd, sabd, off, off, size, raidz_add_abd_cb, \
261	    NULL);							\
262}
263
264/*
265 * Method for multiplying a buffer with a constant in GF(2^8).
266 * Symbols from buffer are multiplied by a constant and result is stored
267 * back in the same buffer.
268 *
269 * @dc		In/Out data buffer.
270 * @size	Size of the buffer
271 * @private	pointer to the multiplication constant (unsigned)
272 */
273static int
274raidz_mul_abd_cb(void *dc, size_t size, void *private)
275{
276	const unsigned mul = *((unsigned *)private);
277	v_t *d = (v_t *)dc;
278	size_t i;
279
280	MUL_DEFINE();
281
282	for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
283		LOAD(d + i, MUL_D);
284		MUL(mul, MUL_D);
285		STORE(d + i, MUL_D);
286
287		LOAD(d + i + MUL_STRIDE, MUL_D);
288		MUL(mul, MUL_D);
289		STORE(d + i + MUL_STRIDE, MUL_D);
290	}
291
292	return (0);
293}
294
295
296/*
297 * Syndrome generation/update macros
298 *
299 * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
300 */
301#define	P_D_SYNDROME(D, T, t)		\
302{					\
303	LOAD((t), T);			\
304	XOR(D, T);			\
305	STORE((t), T);			\
306}
307
308#define	Q_D_SYNDROME(D, T, t)		\
309{					\
310	LOAD((t), T);			\
311	MUL2(T);			\
312	XOR(D, T);			\
313	STORE((t), T);			\
314}
315
316#define	Q_SYNDROME(T, t)		\
317{					\
318	LOAD((t), T);			\
319	MUL2(T);			\
320	STORE((t), T);			\
321}
322
323#define	R_D_SYNDROME(D, T, t)		\
324{					\
325	LOAD((t), T);			\
326	MUL4(T);			\
327	XOR(D, T);			\
328	STORE((t), T);			\
329}
330
331#define	R_SYNDROME(T, t)		\
332{					\
333	LOAD((t), T);			\
334	MUL4(T);			\
335	STORE((t), T);			\
336}
337
338
339/*
340 * PARITY CALCULATION
341 *
342 * Macros *_SYNDROME are used for parity/syndrome calculation.
343 * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
344 * length of data column, and *_SYNDROME() macros are only for updating
345 * the parity/syndrome if data column is shorter.
346 *
347 * P parity is calculated using raidz_add_abd().
348 *
349 * For CPU L2 cache blocking we process 64KB at a time.
350 */
351#define	CHUNK		65536
352
353/*
354 * Generate P parity (RAIDZ1)
355 *
356 * @rr	RAIDZ row
357 */
358static raidz_inline void
359raidz_generate_p_impl(raidz_row_t * const rr)
360{
361	size_t c;
362	const size_t ncols = rr->rr_cols;
363	const size_t psize = rr->rr_col[CODE_P].rc_size;
364	abd_t *pabd = rr->rr_col[CODE_P].rc_abd;
365	size_t off, size;
366
367	raidz_math_begin();
368
369	for (off = 0; off < psize; off += CHUNK) {
370
371		/* start with first data column */
372		size = MIN(CHUNK, psize - off);
373		raidz_copy(pabd, rr->rr_col[1].rc_abd, off, size);
374
375		for (c = 2; c < ncols; c++) {
376			size = rr->rr_col[c].rc_size;
377			if (size <= off)
378				continue;
379
380			/* add data column */
381			size = MIN(CHUNK, size - off);
382			abd_t *dabd = rr->rr_col[c].rc_abd;
383			raidz_add(pabd, dabd, off, size);
384		}
385	}
386
387	raidz_math_end();
388}
389
390
391/*
392 * Generate PQ parity (RAIDZ2)
393 * The function is called per data column.
394 *
395 * @c		array of pointers to parity (code) columns
396 * @dc		pointer to data column
397 * @csize	size of parity columns
398 * @dsize	size of data column
399 */
400static void
401raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
402    const size_t dsize)
403{
404	v_t *p = (v_t *)c[0];
405	v_t *q = (v_t *)c[1];
406	const v_t *d = (const v_t *)dc;
407	const v_t * const dend = d + (dsize / sizeof (v_t));
408	const v_t * const qend = q + (csize / sizeof (v_t));
409
410	GEN_PQ_DEFINE();
411
412	MUL2_SETUP();
413
414	for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
415	    q += GEN_PQ_STRIDE) {
416		LOAD(d, GEN_PQ_D);
417		P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
418		Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
419	}
420	for (; q < qend; q += GEN_PQ_STRIDE) {
421		Q_SYNDROME(GEN_PQ_C, q);
422	}
423}
424
425
426/*
427 * Generate PQ parity (RAIDZ2)
428 *
429 * @rr	RAIDZ row
430 */
431static raidz_inline void
432raidz_generate_pq_impl(raidz_row_t * const rr)
433{
434	size_t c;
435	const size_t ncols = rr->rr_cols;
436	const size_t csize = rr->rr_col[CODE_P].rc_size;
437	size_t off, size, dsize;
438	abd_t *dabd;
439	abd_t *cabds[] = {
440		rr->rr_col[CODE_P].rc_abd,
441		rr->rr_col[CODE_Q].rc_abd
442	};
443
444	raidz_math_begin();
445
446	for (off = 0; off < csize; off += CHUNK) {
447
448		size = MIN(CHUNK, csize - off);
449		raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, off, size);
450		raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, off, size);
451
452		for (c = 3; c < ncols; c++) {
453			dabd = rr->rr_col[c].rc_abd;
454			dsize = rr->rr_col[c].rc_size;
455			dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
456
457			abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 2,
458			    raidz_gen_pq_add);
459		}
460	}
461
462	raidz_math_end();
463}
464
465
466/*
467 * Generate PQR parity (RAIDZ3)
468 * The function is called per data column.
469 *
470 * @c		array of pointers to parity (code) columns
471 * @dc		pointer to data column
472 * @csize	size of parity columns
473 * @dsize	size of data column
474 */
475static void
476raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
477    const size_t dsize)
478{
479	v_t *p = (v_t *)c[CODE_P];
480	v_t *q = (v_t *)c[CODE_Q];
481	v_t *r = (v_t *)c[CODE_R];
482	const v_t *d = (const v_t *)dc;
483	const v_t * const dend = d + (dsize / sizeof (v_t));
484	const v_t * const qend = q + (csize / sizeof (v_t));
485
486	GEN_PQR_DEFINE();
487
488	MUL2_SETUP();
489
490	for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
491	    q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
492		LOAD(d, GEN_PQR_D);
493		P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
494		Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
495		R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
496	}
497	for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
498		Q_SYNDROME(GEN_PQR_C, q);
499		R_SYNDROME(GEN_PQR_C, r);
500	}
501}
502
503
504/*
505 * Generate PQR parity (RAIDZ3)
506 *
507 * @rr	RAIDZ row
508 */
509static raidz_inline void
510raidz_generate_pqr_impl(raidz_row_t * const rr)
511{
512	size_t c;
513	const size_t ncols = rr->rr_cols;
514	const size_t csize = rr->rr_col[CODE_P].rc_size;
515	size_t off, size, dsize;
516	abd_t *dabd;
517	abd_t *cabds[] = {
518		rr->rr_col[CODE_P].rc_abd,
519		rr->rr_col[CODE_Q].rc_abd,
520		rr->rr_col[CODE_R].rc_abd
521	};
522
523	raidz_math_begin();
524
525	for (off = 0; off < csize; off += CHUNK) {
526
527		size = MIN(CHUNK, csize - off);
528		raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, off, size);
529		raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, off, size);
530		raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, off, size);
531
532		for (c = 4; c < ncols; c++) {
533			dabd = rr->rr_col[c].rc_abd;
534			dsize = rr->rr_col[c].rc_size;
535			dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
536
537			abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 3,
538			    raidz_gen_pqr_add);
539		}
540	}
541
542	raidz_math_end();
543}
544
545
546/*
547 * DATA RECONSTRUCTION
548 *
549 * Data reconstruction process consists of two phases:
550 * 	- Syndrome calculation
551 * 	- Data reconstruction
552 *
553 * Syndrome is calculated by generating parity using available data columns
554 * and zeros in places of erasure. Existing parity is added to corresponding
555 * syndrome value to obtain the [P|Q|R]syn values from equation:
556 * 	P = Psyn + Dx + Dy + Dz
557 * 	Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
558 * 	R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
559 *
560 * For data reconstruction phase, the corresponding equations are solved
561 * for missing data (Dx, Dy, Dz). This generally involves multiplying known
562 * symbols by an coefficient and adding them together. The multiplication
563 * constant coefficients are calculated ahead of the operation in
564 * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
565 *
566 * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
567 * and "short" columns.
568 * For this reason, reconstruction is performed in minimum of
569 * two steps. First, from offset 0 to short_size, then from short_size to
570 * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
571 * over both ranges. The split also enables removal of conditional expressions
572 * from loop bodies, improving throughput of SIMD implementations.
573 * For the best performance, all functions marked with raidz_inline attribute
574 * must be inlined by compiler.
575 *
576 *    parity          data
577 *    columns         columns
578 * <----------> <------------------>
579 *                   x       y  <----+ missing columns (x, y)
580 *                   |       |
581 * +---+---+---+---+-v-+---+-v-+---+   ^ 0
582 * |   |   |   |   |   |   |   |   |   |
583 * |   |   |   |   |   |   |   |   |   |
584 * | P | Q | R | D | D | D | D | D |   |
585 * |   |   |   | 0 | 1 | 2 | 3 | 4 |   |
586 * |   |   |   |   |   |   |   |   |   v
587 * |   |   |   |   |   +---+---+---+   ^ short_size
588 * |   |   |   |   |   |               |
589 * +---+---+---+---+---+               v big_size
590 * <------------------> <---------->
591 *      big columns     short columns
592 *
593 */
594
595
596
597
598/*
599 * Reconstruct single data column using P parity
600 *
601 * @syn_method	raidz_add_abd()
602 * @rec_method	not applicable
603 *
604 * @rr		RAIDZ row
605 * @tgtidx	array of missing data indexes
606 */
607static raidz_inline int
608raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
609{
610	size_t c;
611	const size_t firstdc = rr->rr_firstdatacol;
612	const size_t ncols = rr->rr_cols;
613	const size_t x = tgtidx[TARGET_X];
614	const size_t xsize = rr->rr_col[x].rc_size;
615	abd_t *xabd = rr->rr_col[x].rc_abd;
616	size_t off, size;
617
618	if (xabd == NULL)
619		return (1 << CODE_P);
620
621	raidz_math_begin();
622
623	for (off = 0; off < xsize; off += CHUNK) {
624
625		/* copy P into target */
626		size = MIN(CHUNK, xsize - off);
627		raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, off, size);
628
629		/* generate p_syndrome */
630		for (c = firstdc; c < ncols; c++) {
631			if (c == x)
632				continue;
633			size = rr->rr_col[c].rc_size;
634			if (size <= off)
635				continue;
636
637			size = MIN(CHUNK, MIN(size, xsize) - off);
638			abd_t *dabd = rr->rr_col[c].rc_abd;
639			raidz_add(xabd, dabd, off, size);
640		}
641	}
642
643	raidz_math_end();
644
645	return (1 << CODE_P);
646}
647
648
649/*
650 * Generate Q syndrome (Qsyn)
651 *
652 * @xc		array of pointers to syndrome columns
653 * @dc		data column (NULL if missing)
654 * @xsize	size of syndrome columns
655 * @dsize	size of data column (0 if missing)
656 */
657static void
658raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
659    const size_t dsize)
660{
661	v_t *x = (v_t *)xc[TARGET_X];
662	const v_t *d = (const v_t *)dc;
663	const v_t * const dend = d + (dsize / sizeof (v_t));
664	const v_t * const xend = x + (xsize / sizeof (v_t));
665
666	SYN_Q_DEFINE();
667
668	MUL2_SETUP();
669
670	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
671		LOAD(d, SYN_Q_D);
672		Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
673	}
674	for (; x < xend; x += SYN_STRIDE) {
675		Q_SYNDROME(SYN_Q_X, x);
676	}
677}
678
679
680/*
681 * Reconstruct single data column using Q parity
682 *
683 * @syn_method	raidz_add_abd()
684 * @rec_method	raidz_mul_abd_cb()
685 *
686 * @rr		RAIDZ row
687 * @tgtidx	array of missing data indexes
688 */
689static raidz_inline int
690raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
691{
692	size_t c;
693	size_t dsize;
694	abd_t *dabd;
695	const size_t firstdc = rr->rr_firstdatacol;
696	const size_t ncols = rr->rr_cols;
697	const size_t x = tgtidx[TARGET_X];
698	abd_t *xabd = rr->rr_col[x].rc_abd;
699	const size_t xsize = rr->rr_col[x].rc_size;
700	abd_t *tabds[] = { xabd };
701
702	if (xabd == NULL)
703		return (1 << CODE_Q);
704
705	unsigned coeff[MUL_CNT];
706	raidz_rec_q_coeff(rr, tgtidx, coeff);
707
708	raidz_math_begin();
709
710	/* Start with first data column if present */
711	if (firstdc != x) {
712		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
713	} else {
714		raidz_zero(xabd, xsize);
715	}
716
717	/* generate q_syndrome */
718	for (c = firstdc+1; c < ncols; c++) {
719		if (c == x) {
720			dabd = NULL;
721			dsize = 0;
722		} else {
723			dabd = rr->rr_col[c].rc_abd;
724			dsize = rr->rr_col[c].rc_size;
725		}
726
727		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
728		    raidz_syn_q_abd);
729	}
730
731	/* add Q to the syndrome */
732	raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, 0, xsize);
733
734	/* transform the syndrome */
735	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
736
737	raidz_math_end();
738
739	return (1 << CODE_Q);
740}
741
742
743/*
744 * Generate R syndrome (Rsyn)
745 *
746 * @xc		array of pointers to syndrome columns
747 * @dc		data column (NULL if missing)
748 * @tsize	size of syndrome columns
749 * @dsize	size of data column (0 if missing)
750 */
751static void
752raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
753    const size_t dsize)
754{
755	v_t *x = (v_t *)xc[TARGET_X];
756	const v_t *d = (const v_t *)dc;
757	const v_t * const dend = d + (dsize / sizeof (v_t));
758	const v_t * const xend = x + (tsize / sizeof (v_t));
759
760	SYN_R_DEFINE();
761
762	MUL2_SETUP();
763
764	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
765		LOAD(d, SYN_R_D);
766		R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
767	}
768	for (; x < xend; x += SYN_STRIDE) {
769		R_SYNDROME(SYN_R_X, x);
770	}
771}
772
773
774/*
775 * Reconstruct single data column using R parity
776 *
777 * @syn_method	raidz_add_abd()
778 * @rec_method	raidz_mul_abd_cb()
779 *
780 * @rr		RAIDZ rr
781 * @tgtidx	array of missing data indexes
782 */
783static raidz_inline int
784raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
785{
786	size_t c;
787	size_t dsize;
788	abd_t *dabd;
789	const size_t firstdc = rr->rr_firstdatacol;
790	const size_t ncols = rr->rr_cols;
791	const size_t x = tgtidx[TARGET_X];
792	const size_t xsize = rr->rr_col[x].rc_size;
793	abd_t *xabd = rr->rr_col[x].rc_abd;
794	abd_t *tabds[] = { xabd };
795
796	if (xabd == NULL)
797		return (1 << CODE_R);
798
799	unsigned coeff[MUL_CNT];
800	raidz_rec_r_coeff(rr, tgtidx, coeff);
801
802	raidz_math_begin();
803
804	/* Start with first data column if present */
805	if (firstdc != x) {
806		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
807	} else {
808		raidz_zero(xabd, xsize);
809	}
810
811
812	/* generate q_syndrome */
813	for (c = firstdc+1; c < ncols; c++) {
814		if (c == x) {
815			dabd = NULL;
816			dsize = 0;
817		} else {
818			dabd = rr->rr_col[c].rc_abd;
819			dsize = rr->rr_col[c].rc_size;
820		}
821
822		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
823		    raidz_syn_r_abd);
824	}
825
826	/* add R to the syndrome */
827	raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, 0, xsize);
828
829	/* transform the syndrome */
830	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
831
832	raidz_math_end();
833
834	return (1 << CODE_R);
835}
836
837
838/*
839 * Generate P and Q syndromes
840 *
841 * @xc		array of pointers to syndrome columns
842 * @dc		data column (NULL if missing)
843 * @tsize	size of syndrome columns
844 * @dsize	size of data column (0 if missing)
845 */
846static void
847raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
848    const size_t dsize)
849{
850	v_t *x = (v_t *)tc[TARGET_X];
851	v_t *y = (v_t *)tc[TARGET_Y];
852	const v_t *d = (const v_t *)dc;
853	const v_t * const dend = d + (dsize / sizeof (v_t));
854	const v_t * const yend = y + (tsize / sizeof (v_t));
855
856	SYN_PQ_DEFINE();
857
858	MUL2_SETUP();
859
860	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
861		LOAD(d, SYN_PQ_D);
862		P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
863		Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
864	}
865	for (; y < yend; y += SYN_STRIDE) {
866		Q_SYNDROME(SYN_PQ_X, y);
867	}
868}
869
870/*
871 * Reconstruct data using PQ parity and PQ syndromes
872 *
873 * @tc		syndrome/result columns
874 * @tsize	size of syndrome/result columns
875 * @c		parity columns
876 * @mul		array of multiplication constants
877 */
878static void
879raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
880    const unsigned *mul)
881{
882	v_t *x = (v_t *)tc[TARGET_X];
883	v_t *y = (v_t *)tc[TARGET_Y];
884	const v_t * const xend = x + (tsize / sizeof (v_t));
885	const v_t *p = (v_t *)c[CODE_P];
886	const v_t *q = (v_t *)c[CODE_Q];
887
888	REC_PQ_DEFINE();
889
890	for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
891	    p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
892		LOAD(x, REC_PQ_X);
893		LOAD(y, REC_PQ_Y);
894
895		XOR_ACC(p, REC_PQ_X);
896		XOR_ACC(q, REC_PQ_Y);
897
898		/* Save Pxy */
899		COPY(REC_PQ_X,  REC_PQ_T);
900
901		/* Calc X */
902		MUL(mul[MUL_PQ_X], REC_PQ_X);
903		MUL(mul[MUL_PQ_Y], REC_PQ_Y);
904		XOR(REC_PQ_Y,  REC_PQ_X);
905		STORE(x, REC_PQ_X);
906
907		/* Calc Y */
908		XOR(REC_PQ_T,  REC_PQ_X);
909		STORE(y, REC_PQ_X);
910	}
911}
912
913
914/*
915 * Reconstruct two data columns using PQ parity
916 *
917 * @syn_method	raidz_syn_pq_abd()
918 * @rec_method	raidz_rec_pq_abd()
919 *
920 * @rr		RAIDZ row
921 * @tgtidx	array of missing data indexes
922 */
923static raidz_inline int
924raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
925{
926	size_t c;
927	size_t dsize;
928	abd_t *dabd;
929	const size_t firstdc = rr->rr_firstdatacol;
930	const size_t ncols = rr->rr_cols;
931	const size_t x = tgtidx[TARGET_X];
932	const size_t y = tgtidx[TARGET_Y];
933	const size_t xsize = rr->rr_col[x].rc_size;
934	const size_t ysize = rr->rr_col[y].rc_size;
935	abd_t *xabd = rr->rr_col[x].rc_abd;
936	abd_t *yabd = rr->rr_col[y].rc_abd;
937	abd_t *tabds[2] = { xabd, yabd };
938	abd_t *cabds[] = {
939		rr->rr_col[CODE_P].rc_abd,
940		rr->rr_col[CODE_Q].rc_abd
941	};
942
943	if (xabd == NULL)
944		return ((1 << CODE_P) | (1 << CODE_Q));
945
946	unsigned coeff[MUL_CNT];
947	raidz_rec_pq_coeff(rr, tgtidx, coeff);
948
949	/*
950	 * Check if some of targets is shorter then others
951	 * In this case, shorter target needs to be replaced with
952	 * new buffer so that syndrome can be calculated.
953	 */
954	if (ysize < xsize) {
955		yabd = abd_alloc(xsize, B_FALSE);
956		tabds[1] = yabd;
957	}
958
959	raidz_math_begin();
960
961	/* Start with first data column if present */
962	if (firstdc != x) {
963		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
964		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
965	} else {
966		raidz_zero(xabd, xsize);
967		raidz_zero(yabd, xsize);
968	}
969
970	/* generate q_syndrome */
971	for (c = firstdc+1; c < ncols; c++) {
972		if (c == x || c == y) {
973			dabd = NULL;
974			dsize = 0;
975		} else {
976			dabd = rr->rr_col[c].rc_abd;
977			dsize = rr->rr_col[c].rc_size;
978		}
979
980		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
981		    raidz_syn_pq_abd);
982	}
983
984	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
985
986	/* Copy shorter targets back to the original abd buffer */
987	if (ysize < xsize)
988		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
989
990	raidz_math_end();
991
992	if (ysize < xsize)
993		abd_free(yabd);
994
995	return ((1 << CODE_P) | (1 << CODE_Q));
996}
997
998
999/*
1000 * Generate P and R syndromes
1001 *
1002 * @xc		array of pointers to syndrome columns
1003 * @dc		data column (NULL if missing)
1004 * @tsize	size of syndrome columns
1005 * @dsize	size of data column (0 if missing)
1006 */
1007static void
1008raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
1009    const size_t dsize)
1010{
1011	v_t *x = (v_t *)c[TARGET_X];
1012	v_t *y = (v_t *)c[TARGET_Y];
1013	const v_t *d = (const v_t *)dc;
1014	const v_t * const dend = d + (dsize / sizeof (v_t));
1015	const v_t * const yend = y + (tsize / sizeof (v_t));
1016
1017	SYN_PR_DEFINE();
1018
1019	MUL2_SETUP();
1020
1021	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
1022		LOAD(d, SYN_PR_D);
1023		P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
1024		R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
1025	}
1026	for (; y < yend; y += SYN_STRIDE) {
1027		R_SYNDROME(SYN_PR_X, y);
1028	}
1029}
1030
1031/*
1032 * Reconstruct data using PR parity and PR syndromes
1033 *
1034 * @tc		syndrome/result columns
1035 * @tsize	size of syndrome/result columns
1036 * @c		parity columns
1037 * @mul		array of multiplication constants
1038 */
1039static void
1040raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
1041    const unsigned *mul)
1042{
1043	v_t *x = (v_t *)t[TARGET_X];
1044	v_t *y = (v_t *)t[TARGET_Y];
1045	const v_t * const xend = x + (tsize / sizeof (v_t));
1046	const v_t *p = (v_t *)c[CODE_P];
1047	const v_t *q = (v_t *)c[CODE_Q];
1048
1049	REC_PR_DEFINE();
1050
1051	for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
1052	    p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
1053		LOAD(x, REC_PR_X);
1054		LOAD(y, REC_PR_Y);
1055		XOR_ACC(p, REC_PR_X);
1056		XOR_ACC(q, REC_PR_Y);
1057
1058		/* Save Pxy */
1059		COPY(REC_PR_X,  REC_PR_T);
1060
1061		/* Calc X */
1062		MUL(mul[MUL_PR_X], REC_PR_X);
1063		MUL(mul[MUL_PR_Y], REC_PR_Y);
1064		XOR(REC_PR_Y,  REC_PR_X);
1065		STORE(x, REC_PR_X);
1066
1067		/* Calc Y */
1068		XOR(REC_PR_T,  REC_PR_X);
1069		STORE(y, REC_PR_X);
1070	}
1071}
1072
1073
1074/*
1075 * Reconstruct two data columns using PR parity
1076 *
1077 * @syn_method	raidz_syn_pr_abd()
1078 * @rec_method	raidz_rec_pr_abd()
1079 *
1080 * @rr		RAIDZ row
1081 * @tgtidx	array of missing data indexes
1082 */
1083static raidz_inline int
1084raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
1085{
1086	size_t c;
1087	size_t dsize;
1088	abd_t *dabd;
1089	const size_t firstdc = rr->rr_firstdatacol;
1090	const size_t ncols = rr->rr_cols;
1091	const size_t x = tgtidx[0];
1092	const size_t y = tgtidx[1];
1093	const size_t xsize = rr->rr_col[x].rc_size;
1094	const size_t ysize = rr->rr_col[y].rc_size;
1095	abd_t *xabd = rr->rr_col[x].rc_abd;
1096	abd_t *yabd = rr->rr_col[y].rc_abd;
1097	abd_t *tabds[2] = { xabd, yabd };
1098	abd_t *cabds[] = {
1099		rr->rr_col[CODE_P].rc_abd,
1100		rr->rr_col[CODE_R].rc_abd
1101	};
1102
1103	if (xabd == NULL)
1104		return ((1 << CODE_P) | (1 << CODE_R));
1105
1106	unsigned coeff[MUL_CNT];
1107	raidz_rec_pr_coeff(rr, tgtidx, coeff);
1108
1109	/*
1110	 * Check if some of targets are shorter then others.
1111	 * They need to be replaced with a new buffer so that syndrome can
1112	 * be calculated on full length.
1113	 */
1114	if (ysize < xsize) {
1115		yabd = abd_alloc(xsize, B_FALSE);
1116		tabds[1] = yabd;
1117	}
1118
1119	raidz_math_begin();
1120
1121	/* Start with first data column if present */
1122	if (firstdc != x) {
1123		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1124		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1125	} else {
1126		raidz_zero(xabd, xsize);
1127		raidz_zero(yabd, xsize);
1128	}
1129
1130	/* generate q_syndrome */
1131	for (c = firstdc+1; c < ncols; c++) {
1132		if (c == x || c == y) {
1133			dabd = NULL;
1134			dsize = 0;
1135		} else {
1136			dabd = rr->rr_col[c].rc_abd;
1137			dsize = rr->rr_col[c].rc_size;
1138		}
1139
1140		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
1141		    raidz_syn_pr_abd);
1142	}
1143
1144	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
1145
1146	/*
1147	 * Copy shorter targets back to the original abd buffer
1148	 */
1149	if (ysize < xsize)
1150		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
1151
1152	raidz_math_end();
1153
1154	if (ysize < xsize)
1155		abd_free(yabd);
1156
1157	return ((1 << CODE_P) | (1 << CODE_R));
1158}
1159
1160
1161/*
1162 * Generate Q and R syndromes
1163 *
1164 * @xc		array of pointers to syndrome columns
1165 * @dc		data column (NULL if missing)
1166 * @tsize	size of syndrome columns
1167 * @dsize	size of data column (0 if missing)
1168 */
1169static void
1170raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
1171    const size_t dsize)
1172{
1173	v_t *x = (v_t *)c[TARGET_X];
1174	v_t *y = (v_t *)c[TARGET_Y];
1175	const v_t * const xend = x + (tsize / sizeof (v_t));
1176	const v_t *d = (const v_t *)dc;
1177	const v_t * const dend = d + (dsize / sizeof (v_t));
1178
1179	SYN_QR_DEFINE();
1180
1181	MUL2_SETUP();
1182
1183	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
1184		LOAD(d, SYN_PQ_D);
1185		Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
1186		R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
1187	}
1188	for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
1189		Q_SYNDROME(SYN_QR_X, x);
1190		R_SYNDROME(SYN_QR_X, y);
1191	}
1192}
1193
1194
1195/*
1196 * Reconstruct data using QR parity and QR syndromes
1197 *
1198 * @tc		syndrome/result columns
1199 * @tsize	size of syndrome/result columns
1200 * @c		parity columns
1201 * @mul		array of multiplication constants
1202 */
1203static void
1204raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
1205    const unsigned *mul)
1206{
1207	v_t *x = (v_t *)t[TARGET_X];
1208	v_t *y = (v_t *)t[TARGET_Y];
1209	const v_t * const xend = x + (tsize / sizeof (v_t));
1210	const v_t *p = (v_t *)c[CODE_P];
1211	const v_t *q = (v_t *)c[CODE_Q];
1212
1213	REC_QR_DEFINE();
1214
1215	for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
1216	    p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
1217		LOAD(x, REC_QR_X);
1218		LOAD(y, REC_QR_Y);
1219
1220		XOR_ACC(p, REC_QR_X);
1221		XOR_ACC(q, REC_QR_Y);
1222
1223		/* Save Pxy */
1224		COPY(REC_QR_X,  REC_QR_T);
1225
1226		/* Calc X */
1227		MUL(mul[MUL_QR_XQ], REC_QR_X);	/* X = Q * xqm */
1228		XOR(REC_QR_Y, REC_QR_X);	/* X = R ^ X   */
1229		MUL(mul[MUL_QR_X], REC_QR_X);	/* X = X * xm  */
1230		STORE(x, REC_QR_X);
1231
1232		/* Calc Y */
1233		MUL(mul[MUL_QR_YQ], REC_QR_T);	/* X = Q * xqm */
1234		XOR(REC_QR_Y, REC_QR_T);	/* X = R ^ X   */
1235		MUL(mul[MUL_QR_Y], REC_QR_T);	/* X = X * xm  */
1236		STORE(y, REC_QR_T);
1237	}
1238}
1239
1240
1241/*
1242 * Reconstruct two data columns using QR parity
1243 *
1244 * @syn_method	raidz_syn_qr_abd()
1245 * @rec_method	raidz_rec_qr_abd()
1246 *
1247 * @rr		RAIDZ row
1248 * @tgtidx	array of missing data indexes
1249 */
1250static raidz_inline int
1251raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
1252{
1253	size_t c;
1254	size_t dsize;
1255	abd_t *dabd;
1256	const size_t firstdc = rr->rr_firstdatacol;
1257	const size_t ncols = rr->rr_cols;
1258	const size_t x = tgtidx[TARGET_X];
1259	const size_t y = tgtidx[TARGET_Y];
1260	const size_t xsize = rr->rr_col[x].rc_size;
1261	const size_t ysize = rr->rr_col[y].rc_size;
1262	abd_t *xabd = rr->rr_col[x].rc_abd;
1263	abd_t *yabd = rr->rr_col[y].rc_abd;
1264	abd_t *tabds[2] = { xabd, yabd };
1265	abd_t *cabds[] = {
1266		rr->rr_col[CODE_Q].rc_abd,
1267		rr->rr_col[CODE_R].rc_abd
1268	};
1269
1270	if (xabd == NULL)
1271		return ((1 << CODE_Q) | (1 << CODE_R));
1272
1273	unsigned coeff[MUL_CNT];
1274	raidz_rec_qr_coeff(rr, tgtidx, coeff);
1275
1276	/*
1277	 * Check if some of targets is shorter then others
1278	 * In this case, shorter target needs to be replaced with
1279	 * new buffer so that syndrome can be calculated.
1280	 */
1281	if (ysize < xsize) {
1282		yabd = abd_alloc(xsize, B_FALSE);
1283		tabds[1] = yabd;
1284	}
1285
1286	raidz_math_begin();
1287
1288	/* Start with first data column if present */
1289	if (firstdc != x) {
1290		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1291		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1292	} else {
1293		raidz_zero(xabd, xsize);
1294		raidz_zero(yabd, xsize);
1295	}
1296
1297	/* generate q_syndrome */
1298	for (c = firstdc+1; c < ncols; c++) {
1299		if (c == x || c == y) {
1300			dabd = NULL;
1301			dsize = 0;
1302		} else {
1303			dabd = rr->rr_col[c].rc_abd;
1304			dsize = rr->rr_col[c].rc_size;
1305		}
1306
1307		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
1308		    raidz_syn_qr_abd);
1309	}
1310
1311	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
1312
1313	/*
1314	 * Copy shorter targets back to the original abd buffer
1315	 */
1316	if (ysize < xsize)
1317		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
1318
1319	raidz_math_end();
1320
1321	if (ysize < xsize)
1322		abd_free(yabd);
1323
1324
1325	return ((1 << CODE_Q) | (1 << CODE_R));
1326}
1327
1328
1329/*
1330 * Generate P, Q, and R syndromes
1331 *
1332 * @xc		array of pointers to syndrome columns
1333 * @dc		data column (NULL if missing)
1334 * @tsize	size of syndrome columns
1335 * @dsize	size of data column (0 if missing)
1336 */
1337static void
1338raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
1339    const size_t dsize)
1340{
1341	v_t *x = (v_t *)c[TARGET_X];
1342	v_t *y = (v_t *)c[TARGET_Y];
1343	v_t *z = (v_t *)c[TARGET_Z];
1344	const v_t * const yend = y + (tsize / sizeof (v_t));
1345	const v_t *d = (const v_t *)dc;
1346	const v_t * const dend = d + (dsize / sizeof (v_t));
1347
1348	SYN_PQR_DEFINE();
1349
1350	MUL2_SETUP();
1351
1352	for (; d < dend;  d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
1353	    z += SYN_STRIDE) {
1354		LOAD(d, SYN_PQR_D);
1355		P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
1356		Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
1357		R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
1358	}
1359	for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
1360		Q_SYNDROME(SYN_PQR_X, y);
1361		R_SYNDROME(SYN_PQR_X, z);
1362	}
1363}
1364
1365
1366/*
1367 * Reconstruct data using PRQ parity and PQR syndromes
1368 *
1369 * @tc		syndrome/result columns
1370 * @tsize	size of syndrome/result columns
1371 * @c		parity columns
1372 * @mul		array of multiplication constants
1373 */
1374static void
1375raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
1376    const unsigned * const mul)
1377{
1378	v_t *x = (v_t *)t[TARGET_X];
1379	v_t *y = (v_t *)t[TARGET_Y];
1380	v_t *z = (v_t *)t[TARGET_Z];
1381	const v_t * const xend = x + (tsize / sizeof (v_t));
1382	const v_t *p = (v_t *)c[CODE_P];
1383	const v_t *q = (v_t *)c[CODE_Q];
1384	const v_t *r = (v_t *)c[CODE_R];
1385
1386	REC_PQR_DEFINE();
1387
1388	for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
1389	    z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
1390	    r += REC_PQR_STRIDE) {
1391		LOAD(x, REC_PQR_X);
1392		LOAD(y, REC_PQR_Y);
1393		LOAD(z, REC_PQR_Z);
1394
1395		XOR_ACC(p, REC_PQR_X);
1396		XOR_ACC(q, REC_PQR_Y);
1397		XOR_ACC(r, REC_PQR_Z);
1398
1399		/* Save Pxyz and Qxyz */
1400		COPY(REC_PQR_X, REC_PQR_XS);
1401		COPY(REC_PQR_Y, REC_PQR_YS);
1402
1403		/* Calc X */
1404		MUL(mul[MUL_PQR_XP], REC_PQR_X);	/* Xp = Pxyz * xp   */
1405		MUL(mul[MUL_PQR_XQ], REC_PQR_Y);	/* Xq = Qxyz * xq   */
1406		XOR(REC_PQR_Y, REC_PQR_X);
1407		MUL(mul[MUL_PQR_XR], REC_PQR_Z);	/* Xr = Rxyz * xr   */
1408		XOR(REC_PQR_Z, REC_PQR_X);		/* X = Xp + Xq + Xr */
1409		STORE(x, REC_PQR_X);
1410
1411		/* Calc Y */
1412		XOR(REC_PQR_X, REC_PQR_XS); 		/* Pyz = Pxyz + X */
1413		MUL(mul[MUL_PQR_YU], REC_PQR_X);  	/* Xq = X * upd_q */
1414		XOR(REC_PQR_X, REC_PQR_YS); 		/* Qyz = Qxyz + Xq */
1415		COPY(REC_PQR_XS, REC_PQR_X);		/* restore Pyz */
1416		MUL(mul[MUL_PQR_YP], REC_PQR_X);	/* Yp = Pyz * yp */
1417		MUL(mul[MUL_PQR_YQ], REC_PQR_YS);	/* Yq = Qyz * yq */
1418		XOR(REC_PQR_X, REC_PQR_YS); 		/* Y = Yp + Yq */
1419		STORE(y, REC_PQR_YS);
1420
1421		/* Calc Z */
1422		XOR(REC_PQR_XS, REC_PQR_YS);		/* Z = Pz = Pyz + Y */
1423		STORE(z, REC_PQR_YS);
1424	}
1425}
1426
1427
1428/*
1429 * Reconstruct three data columns using PQR parity
1430 *
1431 * @syn_method	raidz_syn_pqr_abd()
1432 * @rec_method	raidz_rec_pqr_abd()
1433 *
1434 * @rr		RAIDZ row
1435 * @tgtidx	array of missing data indexes
1436 */
1437static raidz_inline int
1438raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
1439{
1440	size_t c;
1441	size_t dsize;
1442	abd_t *dabd;
1443	const size_t firstdc = rr->rr_firstdatacol;
1444	const size_t ncols = rr->rr_cols;
1445	const size_t x = tgtidx[TARGET_X];
1446	const size_t y = tgtidx[TARGET_Y];
1447	const size_t z = tgtidx[TARGET_Z];
1448	const size_t xsize = rr->rr_col[x].rc_size;
1449	const size_t ysize = rr->rr_col[y].rc_size;
1450	const size_t zsize = rr->rr_col[z].rc_size;
1451	abd_t *xabd = rr->rr_col[x].rc_abd;
1452	abd_t *yabd = rr->rr_col[y].rc_abd;
1453	abd_t *zabd = rr->rr_col[z].rc_abd;
1454	abd_t *tabds[] = { xabd, yabd, zabd };
1455	abd_t *cabds[] = {
1456		rr->rr_col[CODE_P].rc_abd,
1457		rr->rr_col[CODE_Q].rc_abd,
1458		rr->rr_col[CODE_R].rc_abd
1459	};
1460
1461	if (xabd == NULL)
1462		return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1463
1464	unsigned coeff[MUL_CNT];
1465	raidz_rec_pqr_coeff(rr, tgtidx, coeff);
1466
1467	/*
1468	 * Check if some of targets is shorter then others
1469	 * In this case, shorter target needs to be replaced with
1470	 * new buffer so that syndrome can be calculated.
1471	 */
1472	if (ysize < xsize) {
1473		yabd = abd_alloc(xsize, B_FALSE);
1474		tabds[1] = yabd;
1475	}
1476	if (zsize < xsize) {
1477		zabd = abd_alloc(xsize, B_FALSE);
1478		tabds[2] = zabd;
1479	}
1480
1481	raidz_math_begin();
1482
1483	/* Start with first data column if present */
1484	if (firstdc != x) {
1485		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1486		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1487		raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1488	} else {
1489		raidz_zero(xabd, xsize);
1490		raidz_zero(yabd, xsize);
1491		raidz_zero(zabd, xsize);
1492	}
1493
1494	/* generate q_syndrome */
1495	for (c = firstdc+1; c < ncols; c++) {
1496		if (c == x || c == y || c == z) {
1497			dabd = NULL;
1498			dsize = 0;
1499		} else {
1500			dabd = rr->rr_col[c].rc_abd;
1501			dsize = rr->rr_col[c].rc_size;
1502		}
1503
1504		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 3,
1505		    raidz_syn_pqr_abd);
1506	}
1507
1508	abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
1509
1510	/*
1511	 * Copy shorter targets back to the original abd buffer
1512	 */
1513	if (ysize < xsize)
1514		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
1515	if (zsize < xsize)
1516		raidz_copy(rr->rr_col[z].rc_abd, zabd, 0, zsize);
1517
1518	raidz_math_end();
1519
1520	if (ysize < xsize)
1521		abd_free(yabd);
1522	if (zsize < xsize)
1523		abd_free(zabd);
1524
1525	return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1526}
1527
1528#endif /* _VDEV_RAIDZ_MATH_IMPL_H */
1529