1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (C) 2019 Romain Dolbeau. All rights reserved.
23 *           <romain.dolbeau@european-processor-initiative.eu>
24 */
25
26#include <sys/types.h>
27#include <sys/simd.h>
28
29#ifdef __linux__
30#define	__asm __asm__ __volatile__
31#endif
32
33#define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
34#define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
35
36#define	VR0_(REG, ...) "%[w"#REG"]"
37#define	VR1_(_1, REG, ...) "%[w"#REG"]"
38#define	VR2_(_1, _2, REG, ...) "%[w"#REG"]"
39#define	VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]"
40#define	VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]"
41#define	VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]"
42#define	VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]"
43#define	VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]"
44
45/*
46 * Here we need registers not used otherwise.
47 * They will be used in unused ASM for the case
48 * with more registers than required... but GCC
49 * will still need to make sure the constraints
50 * are correct, and duplicate constraints are illegal
51 * ... and we use the "register" number as a name
52 */
53
54#define	VR0(r...) VR0_(r)
55#define	VR1(r...) VR1_(r)
56#define	VR2(r...) VR2_(r, 36)
57#define	VR3(r...) VR3_(r, 36, 35)
58#define	VR4(r...) VR4_(r, 36, 35, 34, 33)
59#define	VR5(r...) VR5_(r, 36, 35, 34, 33, 32)
60#define	VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31)
61#define	VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30)
62
63#define	VR(X) "%[w"#X"]"
64
65#define	RVR0_(REG, ...) [w##REG] "v" (w##REG)
66#define	RVR1_(_1, REG, ...) [w##REG] "v" (w##REG)
67#define	RVR2_(_1, _2, REG, ...) [w##REG] "v" (w##REG)
68#define	RVR3_(_1, _2, _3, REG, ...) [w##REG] "v" (w##REG)
69#define	RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "v" (w##REG)
70#define	RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "v" (w##REG)
71#define	RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "v" (w##REG)
72#define	RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "v" (w##REG)
73
74#define	RVR0(r...) RVR0_(r)
75#define	RVR1(r...) RVR1_(r)
76#define	RVR2(r...) RVR2_(r, 36)
77#define	RVR3(r...) RVR3_(r, 36, 35)
78#define	RVR4(r...) RVR4_(r, 36, 35, 34, 33)
79#define	RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32)
80#define	RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31)
81#define	RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30)
82
83#define	RVR(X) [w##X] "v" (w##X)
84
85#define	WVR0_(REG, ...) [w##REG] "=v" (w##REG)
86#define	WVR1_(_1, REG, ...) [w##REG] "=v" (w##REG)
87#define	WVR2_(_1, _2, REG, ...) [w##REG] "=v" (w##REG)
88#define	WVR3_(_1, _2, _3, REG, ...) [w##REG] "=v" (w##REG)
89#define	WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=v" (w##REG)
90#define	WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=v" (w##REG)
91#define	WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=v" (w##REG)
92#define	WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=v" (w##REG)
93
94#define	WVR0(r...) WVR0_(r)
95#define	WVR1(r...) WVR1_(r)
96#define	WVR2(r...) WVR2_(r, 36)
97#define	WVR3(r...) WVR3_(r, 36, 35)
98#define	WVR4(r...) WVR4_(r, 36, 35, 34, 33)
99#define	WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32)
100#define	WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31)
101#define	WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30)
102
103#define	WVR(X) [w##X] "=v" (w##X)
104
105#define	UVR0_(REG, ...) [w##REG] "+&v" (w##REG)
106#define	UVR1_(_1, REG, ...) [w##REG] "+&v" (w##REG)
107#define	UVR2_(_1, _2, REG, ...) [w##REG] "+&v" (w##REG)
108#define	UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&v" (w##REG)
109#define	UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&v" (w##REG)
110#define	UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&v" (w##REG)
111#define	UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&v" (w##REG)
112#define	UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&v" (w##REG)
113
114#define	UVR0(r...) UVR0_(r)
115#define	UVR1(r...) UVR1_(r)
116#define	UVR2(r...) UVR2_(r, 36)
117#define	UVR3(r...) UVR3_(r, 36, 35)
118#define	UVR4(r...) UVR4_(r, 36, 35, 34, 33)
119#define	UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32)
120#define	UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
121#define	UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
122
123#define	UVR(X) [w##X] "+&v" (w##X)
124
125#define	R_01(REG1, REG2, ...) REG1, REG2
126#define	_R_23(_0, _1, REG2, REG3, ...) REG2, REG3
127#define	R_23(REG...) _R_23(REG, 1, 2, 3)
128
129#define	ZFS_ASM_BUG()	ASSERT(0)
130
131#define	OFFSET(ptr, val)	(((unsigned char *)(ptr))+val)
132
133extern const uint8_t gf_clmul_mod_lt[4*256][16];
134
135#define	ELEM_SIZE 16
136
137typedef struct v {
138	uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
139} v_t;
140
141#define	XOR_ACC(src, r...)					\
142{								\
143	switch (REG_CNT(r)) {					\
144	case 8:							\
145		__asm(						\
146		"lvx 21,0,%[SRC0]\n"				\
147		"lvx 20,0,%[SRC1]\n"				\
148		"lvx 19,0,%[SRC2]\n"				\
149		"lvx 18,0,%[SRC3]\n"				\
150		"vxor " VR0(r) "," VR0(r) ",21\n"		\
151		"vxor " VR1(r) "," VR1(r) ",20\n"		\
152		"vxor " VR2(r) "," VR2(r) ",19\n"		\
153		"vxor " VR3(r) "," VR3(r) ",18\n"		\
154		"lvx 21,0,%[SRC4]\n"				\
155		"lvx 20,0,%[SRC5]\n"				\
156		"lvx 19,0,%[SRC6]\n"				\
157		"lvx 18,0,%[SRC7]\n"				\
158		"vxor " VR4(r) "," VR4(r) ",21\n"		\
159		"vxor " VR5(r) "," VR5(r) ",20\n"		\
160		"vxor " VR6(r) "," VR6(r) ",19\n"		\
161		"vxor " VR7(r) "," VR7(r) ",18\n"		\
162		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r),	\
163			UVR4(r), UVR5(r), UVR6(r), UVR7(r)	\
164		:	[SRC0] "r" ((OFFSET(src, 0))),		\
165		[SRC1] "r" ((OFFSET(src, 16))),			\
166		[SRC2] "r" ((OFFSET(src, 32))),			\
167		[SRC3] "r" ((OFFSET(src, 48))),			\
168		[SRC4] "r" ((OFFSET(src, 64))),			\
169		[SRC5] "r" ((OFFSET(src, 80))),			\
170		[SRC6] "r" ((OFFSET(src, 96))),			\
171		[SRC7] "r" ((OFFSET(src, 112)))			\
172		:	"v18", "v19", "v20", "v21");		\
173		break;						\
174	case 4:							\
175		__asm(						\
176		"lvx 21,0,%[SRC0]\n"				\
177		"lvx 20,0,%[SRC1]\n"				\
178		"lvx 19,0,%[SRC2]\n"				\
179		"lvx 18,0,%[SRC3]\n"				\
180		"vxor " VR0(r) "," VR0(r) ",21\n"		\
181		"vxor " VR1(r) "," VR1(r) ",20\n"		\
182		"vxor " VR2(r) "," VR2(r) ",19\n"		\
183		"vxor " VR3(r) "," VR3(r) ",18\n"		\
184		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r)	\
185		:	[SRC0] "r" ((OFFSET(src, 0))),		\
186		[SRC1] "r" ((OFFSET(src, 16))),			\
187		[SRC2] "r" ((OFFSET(src, 32))),			\
188		[SRC3] "r" ((OFFSET(src, 48)))			\
189		:	"v18", "v19", "v20", "v21");		\
190		break;						\
191	case 2:							\
192		__asm(						\
193		"lvx 21,0,%[SRC0]\n"				\
194		"lvx 20,0,%[SRC1]\n"				\
195		"vxor " VR0(r) "," VR0(r) ",21\n"		\
196		"vxor " VR1(r) "," VR1(r) ",20\n"		\
197		:	UVR0(r), UVR1(r)			\
198		:	[SRC0] "r" ((OFFSET(src, 0))),		\
199		[SRC1] "r" ((OFFSET(src, 16)))			\
200		:	"v20", "v21");				\
201		break;						\
202	default:						\
203		ZFS_ASM_BUG();					\
204	}							\
205}
206
207#define	XOR(r...)						\
208{								\
209	switch (REG_CNT(r)) {					\
210	case 8:							\
211		__asm(						\
212		"vxor " VR4(r) "," VR4(r) "," VR0(r) "\n"	\
213		"vxor " VR5(r) "," VR5(r) "," VR1(r) "\n"	\
214		"vxor " VR6(r) "," VR6(r) "," VR2(r) "\n"	\
215		"vxor " VR7(r) "," VR7(r) "," VR3(r) "\n"	\
216		:	UVR4(r), UVR5(r), UVR6(r), UVR7(r)	\
217		:	RVR0(r), RVR1(r), RVR2(r), RVR3(r));	\
218		break;						\
219	case 4:							\
220		__asm(						\
221		"vxor " VR2(r) "," VR2(r) "," VR0(r) "\n"	\
222		"vxor " VR3(r) "," VR3(r) "," VR1(r) "\n"	\
223		:	UVR2(r), UVR3(r)			\
224		:	RVR0(r), RVR1(r));			\
225		break;						\
226	default:						\
227		ZFS_ASM_BUG();					\
228	}							\
229}
230
231#define	ZERO(r...)						\
232{								\
233	switch (REG_CNT(r)) {					\
234	case 8:							\
235		__asm(						\
236		"vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
237		"vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
238		"vxor " VR2(r) "," VR2(r) "," VR2(r) "\n"	\
239		"vxor " VR3(r) "," VR3(r) "," VR3(r) "\n"	\
240		"vxor " VR4(r) "," VR4(r) "," VR4(r) "\n"	\
241		"vxor " VR5(r) "," VR5(r) "," VR5(r) "\n"	\
242		"vxor " VR6(r) "," VR6(r) "," VR6(r) "\n"	\
243		"vxor " VR7(r) "," VR7(r) "," VR7(r) "\n"	\
244		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r),	\
245			WVR4(r), WVR5(r), WVR6(r), WVR7(r));	\
246		break;						\
247	case 4:							\
248		__asm(						\
249		"vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
250		"vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
251		"vxor " VR2(r) "," VR2(r) "," VR2(r) "\n"	\
252		"vxor " VR3(r) "," VR3(r) "," VR3(r) "\n"	\
253		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r));	\
254		break;						\
255	case 2:							\
256		__asm(						\
257		"vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
258		"vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
259		:	WVR0(r), WVR1(r));			\
260		break;						\
261	default:						\
262		ZFS_ASM_BUG();					\
263	}							\
264}
265
266#define	COPY(r...)						\
267{								\
268	switch (REG_CNT(r)) {					\
269	case 8:							\
270		__asm(						\
271		"vor " VR4(r) "," VR0(r) "," VR0(r) "\n"	\
272		"vor " VR5(r) "," VR1(r) "," VR1(r) "\n"	\
273		"vor " VR6(r) "," VR2(r) "," VR2(r) "\n"	\
274		"vor " VR7(r) "," VR3(r) "," VR3(r) "\n"	\
275		:	WVR4(r), WVR5(r), WVR6(r), WVR7(r)	\
276		:	RVR0(r), RVR1(r), RVR2(r), RVR3(r));	\
277		break;						\
278	case 4:							\
279		__asm(						\
280		"vor " VR2(r) "," VR0(r) "," VR0(r) "\n"	\
281		"vor " VR3(r) "," VR1(r) "," VR1(r) "\n"	\
282		:	WVR2(r), WVR3(r)			\
283		:	RVR0(r), RVR1(r));			\
284		break;						\
285	default:						\
286		ZFS_ASM_BUG();					\
287	}							\
288}
289
290#define	LOAD(src, r...)						\
291{								\
292	switch (REG_CNT(r)) {					\
293	case 8:							\
294		__asm(						\
295		"lvx " VR0(r) " ,0,%[SRC0]\n"			\
296		"lvx " VR1(r) " ,0,%[SRC1]\n"			\
297		"lvx " VR2(r) " ,0,%[SRC2]\n"			\
298		"lvx " VR3(r) " ,0,%[SRC3]\n"			\
299		"lvx " VR4(r) " ,0,%[SRC4]\n"			\
300		"lvx " VR5(r) " ,0,%[SRC5]\n"			\
301		"lvx " VR6(r) " ,0,%[SRC6]\n"			\
302		"lvx " VR7(r) " ,0,%[SRC7]\n"			\
303		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r),	\
304			WVR4(r), WVR5(r), WVR6(r), WVR7(r)	\
305		:	[SRC0] "r" ((OFFSET(src, 0))),		\
306		[SRC1] "r" ((OFFSET(src, 16))),			\
307		[SRC2] "r" ((OFFSET(src, 32))),			\
308		[SRC3] "r" ((OFFSET(src, 48))),			\
309		[SRC4] "r" ((OFFSET(src, 64))),			\
310		[SRC5] "r" ((OFFSET(src, 80))),			\
311		[SRC6] "r" ((OFFSET(src, 96))),			\
312		[SRC7] "r" ((OFFSET(src, 112))));		\
313		break;						\
314	case 4:							\
315		__asm(						\
316		"lvx " VR0(r) " ,0,%[SRC0]\n"			\
317		"lvx " VR1(r) " ,0,%[SRC1]\n"			\
318		"lvx " VR2(r) " ,0,%[SRC2]\n"			\
319		"lvx " VR3(r) " ,0,%[SRC3]\n"			\
320		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r)	\
321		:	[SRC0] "r" ((OFFSET(src, 0))),		\
322		[SRC1] "r" ((OFFSET(src, 16))),			\
323		[SRC2] "r" ((OFFSET(src, 32))),			\
324		[SRC3] "r" ((OFFSET(src, 48))));		\
325		break;						\
326	case 2:							\
327		__asm(						\
328		"lvx " VR0(r) " ,0,%[SRC0]\n"			\
329		"lvx " VR1(r) " ,0,%[SRC1]\n"			\
330		:	WVR0(r), WVR1(r)			\
331		:	[SRC0] "r" ((OFFSET(src, 0))),		\
332		[SRC1] "r" ((OFFSET(src, 16))));		\
333		break;						\
334	default:						\
335		ZFS_ASM_BUG();					\
336	}							\
337}
338
339#define	STORE(dst, r...)					\
340{								\
341	switch (REG_CNT(r)) {					\
342	case 8:							\
343		__asm(						\
344		"stvx " VR0(r) " ,0,%[DST0]\n"			\
345		"stvx " VR1(r) " ,0,%[DST1]\n"			\
346		"stvx " VR2(r) " ,0,%[DST2]\n"			\
347		"stvx " VR3(r) " ,0,%[DST3]\n"			\
348		"stvx " VR4(r) " ,0,%[DST4]\n"			\
349		"stvx " VR5(r) " ,0,%[DST5]\n"			\
350		"stvx " VR6(r) " ,0,%[DST6]\n"			\
351		"stvx " VR7(r) " ,0,%[DST7]\n"			\
352		: :	[DST0] "r" ((OFFSET(dst, 0))),		\
353		[DST1] "r" ((OFFSET(dst, 16))),			\
354		[DST2] "r" ((OFFSET(dst, 32))),			\
355		[DST3] "r" ((OFFSET(dst, 48))),			\
356		[DST4] "r" ((OFFSET(dst, 64))),			\
357		[DST5] "r" ((OFFSET(dst, 80))),			\
358		[DST6] "r" ((OFFSET(dst, 96))),			\
359		[DST7] "r" ((OFFSET(dst, 112))),		\
360		RVR0(r), RVR1(r), RVR2(r), RVR3(r),		\
361		RVR4(r), RVR5(r), RVR6(r), RVR7(r)		\
362		:	"memory");				\
363		break;						\
364	case 4:							\
365		__asm(						\
366		"stvx " VR0(r) " ,0,%[DST0]\n"			\
367		"stvx " VR1(r) " ,0,%[DST1]\n"			\
368		"stvx " VR2(r) " ,0,%[DST2]\n"			\
369		"stvx " VR3(r) " ,0,%[DST3]\n"			\
370		: :	[DST0] "r" ((OFFSET(dst, 0))),		\
371		[DST1] "r" ((OFFSET(dst, 16))),			\
372		[DST2] "r" ((OFFSET(dst, 32))),			\
373		[DST3] "r" ((OFFSET(dst, 48))),			\
374		RVR0(r), RVR1(r), RVR2(r), RVR3(r)		\
375		: "memory");					\
376		break;						\
377	case 2:							\
378		__asm(						\
379		"stvx " VR0(r) " ,0,%[DST0]\n"			\
380		"stvx " VR1(r) " ,0,%[DST1]\n"			\
381		: :	[DST0] "r" ((OFFSET(dst, 0))),		\
382		[DST1] "r" ((OFFSET(dst, 16))),			\
383		RVR0(r), RVR1(r) : "memory");			\
384		break;						\
385	default:						\
386		ZFS_ASM_BUG();					\
387	}							\
388}
389
390/*
391 * Unfortunately cannot use the macro, because GCC
392 * will try to use the macro name and not value
393 * later on...
394 * Kept as a reference to what a numbered variable is
395 */
396#define	_00	"17"
397#define	_1d	"16"
398#define	_temp0	"19"
399#define	_temp1	"18"
400
401#define	MUL2_SETUP()						\
402{								\
403	__asm(							\
404		"vspltisb " VR(16) ",14\n"			\
405		"vspltisb " VR(17) ",15\n"			\
406		"vaddubm " VR(16) "," VR(17) "," VR(16) "\n"	\
407		"vxor " VR(17) "," VR(17) "," VR(17) "\n"	\
408		:	WVR(16), WVR(17));			\
409}
410
411#define	MUL2(r...)						\
412{								\
413	switch (REG_CNT(r)) {					\
414	case 4:							\
415		__asm(						\
416		"vcmpgtsb 19," VR(17) "," VR0(r) "\n"		\
417		"vcmpgtsb 18," VR(17) "," VR1(r) "\n"		\
418		"vcmpgtsb 21," VR(17) "," VR2(r) "\n"		\
419		"vcmpgtsb 20," VR(17) "," VR3(r) "\n"		\
420		"vand 19,19," VR(16) "\n"			\
421		"vand 18,18," VR(16) "\n"			\
422		"vand 21,21," VR(16) "\n"			\
423		"vand 20,20," VR(16) "\n"			\
424		"vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
425		"vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
426		"vaddubm " VR2(r) "," VR2(r) "," VR2(r) "\n"	\
427		"vaddubm " VR3(r) "," VR3(r) "," VR3(r) "\n"	\
428		"vxor " VR0(r) ",19," VR0(r) "\n"		\
429		"vxor " VR1(r) ",18," VR1(r) "\n"		\
430		"vxor " VR2(r) ",21," VR2(r) "\n"		\
431		"vxor " VR3(r) ",20," VR3(r) "\n"		\
432		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r)	\
433		:	RVR(17), RVR(16)			\
434		:	"v18", "v19", "v20", "v21");		\
435		break;						\
436	case 2:							\
437		__asm(						\
438		"vcmpgtsb 19," VR(17) "," VR0(r) "\n"		\
439		"vcmpgtsb 18," VR(17) "," VR1(r) "\n"		\
440		"vand 19,19," VR(16) "\n"			\
441		"vand 18,18," VR(16) "\n"			\
442		"vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
443		"vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
444		"vxor " VR0(r) ",19," VR0(r) "\n"		\
445		"vxor " VR1(r) ",18," VR1(r) "\n"		\
446		:	UVR0(r), UVR1(r)			\
447		:	RVR(17), RVR(16)			\
448		:	"v18", "v19");				\
449		break;						\
450	default:						\
451		ZFS_ASM_BUG();					\
452	}							\
453}
454
455#define	MUL4(r...)						\
456{								\
457	MUL2(r);						\
458	MUL2(r);						\
459}
460
461/*
462 * Unfortunately cannot use the macro, because GCC
463 * will try to use the macro name and not value
464 * later on...
465 * Kept as a reference to what a register is
466 * (here we're using actual registers for the
467 * clobbered ones)
468 */
469#define	_0f		"15"
470#define	_a_save		"14"
471#define	_b_save		"13"
472#define	_lt_mod_a	"12"
473#define	_lt_clmul_a	"11"
474#define	_lt_mod_b	"10"
475#define	_lt_clmul_b	"15"
476
477#define	_MULx2(c, r...)						\
478{								\
479	switch (REG_CNT(r)) {					\
480	case 2:							\
481		__asm(						\
482		/* lts for upper part */			\
483		"vspltisb 15,15\n"				\
484		"lvx 10,0,%[lt0]\n"				\
485		"lvx 11,0,%[lt1]\n"				\
486		/* upper part */				\
487		"vand 14," VR0(r) ",15\n"			\
488		"vand 13," VR1(r) ",15\n"			\
489		"vspltisb 15,4\n"				\
490		"vsrab " VR0(r) "," VR0(r) ",15\n"		\
491		"vsrab " VR1(r) "," VR1(r) ",15\n"		\
492								\
493		"vperm 12,10,10," VR0(r) "\n"			\
494		"vperm 10,10,10," VR1(r) "\n"			\
495		"vperm 15,11,11," VR0(r) "\n"			\
496		"vperm 11,11,11," VR1(r) "\n"			\
497								\
498		"vxor " VR0(r) ",15,12\n"			\
499		"vxor " VR1(r) ",11,10\n"			\
500		/* lts for lower part */			\
501		"lvx 10,0,%[lt2]\n"				\
502		"lvx 15,0,%[lt3]\n"				\
503		/* lower part */				\
504		"vperm 12,10,10,14\n"				\
505		"vperm 10,10,10,13\n"				\
506		"vperm 11,15,15,14\n"				\
507		"vperm 15,15,15,13\n"				\
508								\
509		"vxor " VR0(r) "," VR0(r) ",12\n"		\
510		"vxor " VR1(r) "," VR1(r) ",10\n"		\
511		"vxor " VR0(r) "," VR0(r) ",11\n"		\
512		"vxor " VR1(r) "," VR1(r) ",15\n"		\
513		: UVR0(r), UVR1(r)				\
514		: [lt0] "r" (&(gf_clmul_mod_lt[4*(c)+0][0])),	\
515		[lt1] "r" (&(gf_clmul_mod_lt[4*(c)+1][0])),	\
516		[lt2] "r" (&(gf_clmul_mod_lt[4*(c)+2][0])),	\
517		[lt3] "r" (&(gf_clmul_mod_lt[4*(c)+3][0]))	\
518		: "v10", "v11", "v12", "v13", "v14", "v15");	\
519		break;						\
520	default:						\
521		ZFS_ASM_BUG();					\
522	}							\
523}
524
525#define	MUL(c, r...)						\
526{								\
527	switch (REG_CNT(r)) {					\
528	case 4:							\
529		_MULx2(c, R_23(r));				\
530		_MULx2(c, R_01(r));				\
531		break;						\
532	case 2:							\
533		_MULx2(c, R_01(r));				\
534		break;						\
535	default:						\
536		ZFS_ASM_BUG();					\
537	}							\
538}
539
540#define	raidz_math_begin()	kfpu_begin()
541#define	raidz_math_end()	kfpu_end()
542
543/* Overkill... */
544#if 0 // defined(_KERNEL)
545#define	GEN_X_DEFINE_0_3()	\
546register unsigned char w0 asm("0") __attribute__((vector_size(16)));	\
547register unsigned char w1 asm("1") __attribute__((vector_size(16)));	\
548register unsigned char w2 asm("2") __attribute__((vector_size(16)));	\
549register unsigned char w3 asm("3") __attribute__((vector_size(16)));
550#define	GEN_X_DEFINE_4_5()	\
551register unsigned char w4 asm("4") __attribute__((vector_size(16)));	\
552register unsigned char w5 asm("5") __attribute__((vector_size(16)));
553#define	GEN_X_DEFINE_6_7()	\
554register unsigned char w6 asm("6") __attribute__((vector_size(16)));	\
555register unsigned char w7 asm("7") __attribute__((vector_size(16)));
556#define	GEN_X_DEFINE_8_9()	\
557register unsigned char w8 asm("8") __attribute__((vector_size(16)));	\
558register unsigned char w9 asm("9") __attribute__((vector_size(16)));
559#define	GEN_X_DEFINE_10_11()	\
560register unsigned char w10 asm("10") __attribute__((vector_size(16)));	\
561register unsigned char w11 asm("11") __attribute__((vector_size(16)));
562#define	GEN_X_DEFINE_12_15()	\
563register unsigned char w12 asm("12") __attribute__((vector_size(16)));	\
564register unsigned char w13 asm("13") __attribute__((vector_size(16)));	\
565register unsigned char w14 asm("14") __attribute__((vector_size(16)));	\
566register unsigned char w15 asm("15") __attribute__((vector_size(16)));
567#define	GEN_X_DEFINE_16()	\
568register unsigned char w16 asm("16") __attribute__((vector_size(16)));
569#define	GEN_X_DEFINE_17()	\
570register unsigned char w17 asm("17") __attribute__((vector_size(16)));
571#define	GEN_X_DEFINE_18_21()	\
572register unsigned char w18 asm("18") __attribute__((vector_size(16)));	\
573register unsigned char w19 asm("19") __attribute__((vector_size(16)));	\
574register unsigned char w20 asm("20") __attribute__((vector_size(16)));	\
575register unsigned char w21 asm("21") __attribute__((vector_size(16)));
576#define	GEN_X_DEFINE_22_23()	\
577register unsigned char w22 asm("22") __attribute__((vector_size(16)));	\
578register unsigned char w23 asm("23") __attribute__((vector_size(16)));
579#define	GEN_X_DEFINE_24_27()	\
580register unsigned char w24 asm("24") __attribute__((vector_size(16)));	\
581register unsigned char w25 asm("25") __attribute__((vector_size(16)));	\
582register unsigned char w26 asm("26") __attribute__((vector_size(16)));	\
583register unsigned char w27 asm("27") __attribute__((vector_size(16)));
584#define	GEN_X_DEFINE_28_30()	\
585register unsigned char w28 asm("28") __attribute__((vector_size(16)));	\
586register unsigned char w29 asm("29") __attribute__((vector_size(16)));	\
587register unsigned char w30 asm("30") __attribute__((vector_size(16)));
588#define	GEN_X_DEFINE_31()	\
589register unsigned char w31 asm("31") __attribute__((vector_size(16)));
590#define	GEN_X_DEFINE_32()	\
591register unsigned char w32 asm("31") __attribute__((vector_size(16)));
592#define	GEN_X_DEFINE_33_36()	\
593register unsigned char w33 asm("31") __attribute__((vector_size(16)));	\
594register unsigned char w34 asm("31") __attribute__((vector_size(16)));	\
595register unsigned char w35 asm("31") __attribute__((vector_size(16)));	\
596register unsigned char w36 asm("31") __attribute__((vector_size(16)));
597#define	GEN_X_DEFINE_37_38()	\
598register unsigned char w37 asm("31") __attribute__((vector_size(16)));	\
599register unsigned char w38 asm("31") __attribute__((vector_size(16)));
600#define	GEN_X_DEFINE_ALL()	\
601	GEN_X_DEFINE_0_3()	\
602	GEN_X_DEFINE_4_5()	\
603	GEN_X_DEFINE_6_7()	\
604	GEN_X_DEFINE_8_9()	\
605	GEN_X_DEFINE_10_11()	\
606	GEN_X_DEFINE_12_15()	\
607	GEN_X_DEFINE_16()	\
608	GEN_X_DEFINE_17()	\
609	GEN_X_DEFINE_18_21()	\
610	GEN_X_DEFINE_22_23()	\
611	GEN_X_DEFINE_24_27()	\
612	GEN_X_DEFINE_28_30()	\
613	GEN_X_DEFINE_31()	\
614	GEN_X_DEFINE_32()	\
615	GEN_X_DEFINE_33_36() 	\
616	GEN_X_DEFINE_37_38()
617#else
618#define	GEN_X_DEFINE_0_3()	\
619	unsigned char w0 __attribute__((vector_size(16)));	\
620	unsigned char w1 __attribute__((vector_size(16)));	\
621	unsigned char w2 __attribute__((vector_size(16)));	\
622	unsigned char w3 __attribute__((vector_size(16)));
623#define	GEN_X_DEFINE_4_5()	\
624	unsigned char w4 __attribute__((vector_size(16)));	\
625	unsigned char w5 __attribute__((vector_size(16)));
626#define	GEN_X_DEFINE_6_7()	\
627	unsigned char w6 __attribute__((vector_size(16)));	\
628	unsigned char w7 __attribute__((vector_size(16)));
629#define	GEN_X_DEFINE_8_9()	\
630	unsigned char w8 __attribute__((vector_size(16)));	\
631	unsigned char w9 __attribute__((vector_size(16)));
632#define	GEN_X_DEFINE_10_11()	\
633	unsigned char w10 __attribute__((vector_size(16)));	\
634	unsigned char w11 __attribute__((vector_size(16)));
635#define	GEN_X_DEFINE_12_15()	\
636	unsigned char w12 __attribute__((vector_size(16)));	\
637	unsigned char w13 __attribute__((vector_size(16)));	\
638	unsigned char w14 __attribute__((vector_size(16)));	\
639	unsigned char w15 __attribute__((vector_size(16)));
640#define	GEN_X_DEFINE_16()	\
641	unsigned char w16 __attribute__((vector_size(16)));
642#define	GEN_X_DEFINE_17()	\
643	unsigned char w17 __attribute__((vector_size(16)));
644#define	GEN_X_DEFINE_18_21()	\
645	unsigned char w18 __attribute__((vector_size(16)));	\
646	unsigned char w19 __attribute__((vector_size(16)));	\
647	unsigned char w20 __attribute__((vector_size(16)));	\
648	unsigned char w21 __attribute__((vector_size(16)));
649#define	GEN_X_DEFINE_22_23()	\
650	unsigned char w22 __attribute__((vector_size(16)));	\
651	unsigned char w23 __attribute__((vector_size(16)));
652#define	GEN_X_DEFINE_24_27()	\
653	unsigned char w24 __attribute__((vector_size(16)));	\
654	unsigned char w25 __attribute__((vector_size(16)));	\
655	unsigned char w26 __attribute__((vector_size(16)));	\
656	unsigned char w27 __attribute__((vector_size(16)));
657#define	GEN_X_DEFINE_28_30()	\
658	unsigned char w28 __attribute__((vector_size(16)));	\
659	unsigned char w29 __attribute__((vector_size(16)));	\
660	unsigned char w30 __attribute__((vector_size(16)));
661#define	GEN_X_DEFINE_31()	\
662	unsigned char w31 __attribute__((vector_size(16)));
663#define	GEN_X_DEFINE_32()	\
664	unsigned char w32 __attribute__((vector_size(16)));
665#define	GEN_X_DEFINE_33_36()	\
666	unsigned char w33 __attribute__((vector_size(16)));	\
667	unsigned char w34 __attribute__((vector_size(16)));	\
668	unsigned char w35 __attribute__((vector_size(16)));	\
669	unsigned char w36 __attribute__((vector_size(16)));
670#define	GEN_X_DEFINE_37_38()	\
671	unsigned char w37 __attribute__((vector_size(16)));	\
672	unsigned char w38 __attribute__((vector_size(16)));
673#define	GEN_X_DEFINE_ALL()	\
674	GEN_X_DEFINE_0_3()	\
675	GEN_X_DEFINE_4_5()	\
676	GEN_X_DEFINE_6_7()	\
677	GEN_X_DEFINE_8_9()	\
678	GEN_X_DEFINE_10_11()	\
679	GEN_X_DEFINE_12_15()	\
680	GEN_X_DEFINE_16()	\
681	GEN_X_DEFINE_17()	\
682	GEN_X_DEFINE_18_21()	\
683	GEN_X_DEFINE_22_23()	\
684	GEN_X_DEFINE_24_27()	\
685	GEN_X_DEFINE_28_30()	\
686	GEN_X_DEFINE_31()	\
687	GEN_X_DEFINE_32()	\
688	GEN_X_DEFINE_33_36()	\
689	GEN_X_DEFINE_37_38()
690#endif
691