1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (C) 2019 Romain Dolbeau. All rights reserved.
23 *           <romain.dolbeau@european-processor-initiative.eu>
24 */
25
26#include <sys/types.h>
27#include <sys/simd.h>
28
29#define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
30#define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
31
32#define	VR0_(REG, ...) "%[w"#REG"]"
33#define	VR1_(_1, REG, ...) "%[w"#REG"]"
34#define	VR2_(_1, _2, REG, ...) "%[w"#REG"]"
35#define	VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]"
36#define	VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]"
37#define	VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]"
38#define	VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]"
39#define	VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]"
40
41/*
42 * Here we need registers not used otherwise.
43 * They will be used in unused ASM for the case
44 * with more registers than required... but GCC
45 * will still need to make sure the constraints
46 * are correct, and duplicate constraints are illegal
47 * ... and we use the "register" number as a name
48 */
49
50#define	VR0(r...) VR0_(r)
51#define	VR1(r...) VR1_(r)
52#define	VR2(r...) VR2_(r, 36)
53#define	VR3(r...) VR3_(r, 36, 35)
54#define	VR4(r...) VR4_(r, 36, 35, 34, 33)
55#define	VR5(r...) VR5_(r, 36, 35, 34, 33, 32)
56#define	VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31)
57#define	VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30)
58
59#define	VR(X) "%[w"#X"]"
60
61#define	RVR0_(REG, ...) [w##REG] "v" (w##REG)
62#define	RVR1_(_1, REG, ...) [w##REG] "v" (w##REG)
63#define	RVR2_(_1, _2, REG, ...) [w##REG] "v" (w##REG)
64#define	RVR3_(_1, _2, _3, REG, ...) [w##REG] "v" (w##REG)
65#define	RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "v" (w##REG)
66#define	RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "v" (w##REG)
67#define	RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "v" (w##REG)
68#define	RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "v" (w##REG)
69
70#define	RVR0(r...) RVR0_(r)
71#define	RVR1(r...) RVR1_(r)
72#define	RVR2(r...) RVR2_(r, 36)
73#define	RVR3(r...) RVR3_(r, 36, 35)
74#define	RVR4(r...) RVR4_(r, 36, 35, 34, 33)
75#define	RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32)
76#define	RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31)
77#define	RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30)
78
79#define	RVR(X) [w##X] "v" (w##X)
80
81#define	WVR0_(REG, ...) [w##REG] "=v" (w##REG)
82#define	WVR1_(_1, REG, ...) [w##REG] "=v" (w##REG)
83#define	WVR2_(_1, _2, REG, ...) [w##REG] "=v" (w##REG)
84#define	WVR3_(_1, _2, _3, REG, ...) [w##REG] "=v" (w##REG)
85#define	WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=v" (w##REG)
86#define	WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=v" (w##REG)
87#define	WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=v" (w##REG)
88#define	WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=v" (w##REG)
89
90#define	WVR0(r...) WVR0_(r)
91#define	WVR1(r...) WVR1_(r)
92#define	WVR2(r...) WVR2_(r, 36)
93#define	WVR3(r...) WVR3_(r, 36, 35)
94#define	WVR4(r...) WVR4_(r, 36, 35, 34, 33)
95#define	WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32)
96#define	WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31)
97#define	WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30)
98
99#define	WVR(X) [w##X] "=v" (w##X)
100
101#define	UVR0_(REG, ...) [w##REG] "+&v" (w##REG)
102#define	UVR1_(_1, REG, ...) [w##REG] "+&v" (w##REG)
103#define	UVR2_(_1, _2, REG, ...) [w##REG] "+&v" (w##REG)
104#define	UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&v" (w##REG)
105#define	UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&v" (w##REG)
106#define	UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&v" (w##REG)
107#define	UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&v" (w##REG)
108#define	UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&v" (w##REG)
109
110#define	UVR0(r...) UVR0_(r)
111#define	UVR1(r...) UVR1_(r)
112#define	UVR2(r...) UVR2_(r, 36)
113#define	UVR3(r...) UVR3_(r, 36, 35)
114#define	UVR4(r...) UVR4_(r, 36, 35, 34, 33)
115#define	UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32)
116#define	UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
117#define	UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
118
119#define	UVR(X) [w##X] "+&v" (w##X)
120
121#define	R_01(REG1, REG2, ...) REG1, REG2
122#define	_R_23(_0, _1, REG2, REG3, ...) REG2, REG3
123#define	R_23(REG...) _R_23(REG, 1, 2, 3)
124
125#define	ZFS_ASM_BUG()	ASSERT(0)
126
127#define	OFFSET(ptr, val)	(((unsigned char *)(ptr))+val)
128
129extern const uint8_t gf_clmul_mod_lt[4*256][16];
130
131#define	ELEM_SIZE 16
132
133typedef struct v {
134	uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
135} v_t;
136
137#define	XOR_ACC(src, r...)					\
138{								\
139	switch (REG_CNT(r)) {					\
140	case 8:							\
141		__asm__ __volatile__(				\
142		"lvx 21,0,%[SRC0]\n"				\
143		"lvx 20,0,%[SRC1]\n"				\
144		"lvx 19,0,%[SRC2]\n"				\
145		"lvx 18,0,%[SRC3]\n"				\
146		"vxor " VR0(r) "," VR0(r) ",21\n"		\
147		"vxor " VR1(r) "," VR1(r) ",20\n"		\
148		"vxor " VR2(r) "," VR2(r) ",19\n"		\
149		"vxor " VR3(r) "," VR3(r) ",18\n"		\
150		"lvx 21,0,%[SRC4]\n"				\
151		"lvx 20,0,%[SRC5]\n"				\
152		"lvx 19,0,%[SRC6]\n"				\
153		"lvx 18,0,%[SRC7]\n"				\
154		"vxor " VR4(r) "," VR4(r) ",21\n"		\
155		"vxor " VR5(r) "," VR5(r) ",20\n"		\
156		"vxor " VR6(r) "," VR6(r) ",19\n"		\
157		"vxor " VR7(r) "," VR7(r) ",18\n"		\
158		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r),	\
159			UVR4(r), UVR5(r), UVR6(r), UVR7(r)	\
160		:	[SRC0] "r" ((OFFSET(src, 0))),		\
161		[SRC1] "r" ((OFFSET(src, 16))),			\
162		[SRC2] "r" ((OFFSET(src, 32))),			\
163		[SRC3] "r" ((OFFSET(src, 48))),			\
164		[SRC4] "r" ((OFFSET(src, 64))),			\
165		[SRC5] "r" ((OFFSET(src, 80))),			\
166		[SRC6] "r" ((OFFSET(src, 96))),			\
167		[SRC7] "r" ((OFFSET(src, 112)))			\
168		:	"v18", "v19", "v20", "v21");		\
169		break;						\
170	case 4:							\
171		__asm__ __volatile__(				\
172		"lvx 21,0,%[SRC0]\n"				\
173		"lvx 20,0,%[SRC1]\n"				\
174		"lvx 19,0,%[SRC2]\n"				\
175		"lvx 18,0,%[SRC3]\n"				\
176		"vxor " VR0(r) "," VR0(r) ",21\n"		\
177		"vxor " VR1(r) "," VR1(r) ",20\n"		\
178		"vxor " VR2(r) "," VR2(r) ",19\n"		\
179		"vxor " VR3(r) "," VR3(r) ",18\n"		\
180		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r)	\
181		:	[SRC0] "r" ((OFFSET(src, 0))),		\
182		[SRC1] "r" ((OFFSET(src, 16))),			\
183		[SRC2] "r" ((OFFSET(src, 32))),			\
184		[SRC3] "r" ((OFFSET(src, 48)))			\
185		:	"v18", "v19", "v20", "v21");		\
186		break;						\
187	case 2:							\
188		__asm__ __volatile__(				\
189		"lvx 21,0,%[SRC0]\n"				\
190		"lvx 20,0,%[SRC1]\n"				\
191		"vxor " VR0(r) "," VR0(r) ",21\n"		\
192		"vxor " VR1(r) "," VR1(r) ",20\n"		\
193		:	UVR0(r), UVR1(r)			\
194		:	[SRC0] "r" ((OFFSET(src, 0))),		\
195		[SRC1] "r" ((OFFSET(src, 16)))			\
196		:	"v20", "v21");				\
197		break;						\
198	default:						\
199		ZFS_ASM_BUG();					\
200	}							\
201}
202
203#define	XOR(r...)						\
204{								\
205	switch (REG_CNT(r)) {					\
206	case 8:							\
207		__asm__ __volatile__(				\
208		"vxor " VR4(r) "," VR4(r) "," VR0(r) "\n"	\
209		"vxor " VR5(r) "," VR5(r) "," VR1(r) "\n"	\
210		"vxor " VR6(r) "," VR6(r) "," VR2(r) "\n"	\
211		"vxor " VR7(r) "," VR7(r) "," VR3(r) "\n"	\
212		:	UVR4(r), UVR5(r), UVR6(r), UVR7(r)	\
213		:	RVR0(r), RVR1(r), RVR2(r), RVR3(r));	\
214		break;						\
215	case 4:							\
216		__asm__ __volatile__(				\
217		"vxor " VR2(r) "," VR2(r) "," VR0(r) "\n"	\
218		"vxor " VR3(r) "," VR3(r) "," VR1(r) "\n"	\
219		:	UVR2(r), UVR3(r)			\
220		:	RVR0(r), RVR1(r));			\
221		break;						\
222	default:						\
223		ZFS_ASM_BUG();					\
224	}							\
225}
226
227#define	ZERO(r...)						\
228{								\
229	switch (REG_CNT(r)) {					\
230	case 8:							\
231		__asm__ __volatile__(				\
232		"vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
233		"vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
234		"vxor " VR2(r) "," VR2(r) "," VR2(r) "\n"	\
235		"vxor " VR3(r) "," VR3(r) "," VR3(r) "\n"	\
236		"vxor " VR4(r) "," VR4(r) "," VR4(r) "\n"	\
237		"vxor " VR5(r) "," VR5(r) "," VR5(r) "\n"	\
238		"vxor " VR6(r) "," VR6(r) "," VR6(r) "\n"	\
239		"vxor " VR7(r) "," VR7(r) "," VR7(r) "\n"	\
240		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r),	\
241			WVR4(r), WVR5(r), WVR6(r), WVR7(r));	\
242		break;						\
243	case 4:							\
244		__asm__ __volatile__(				\
245		"vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
246		"vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
247		"vxor " VR2(r) "," VR2(r) "," VR2(r) "\n"	\
248		"vxor " VR3(r) "," VR3(r) "," VR3(r) "\n"	\
249		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r));	\
250		break;						\
251	case 2:							\
252		__asm__ __volatile__(				\
253		"vxor " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
254		"vxor " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
255		:	WVR0(r), WVR1(r));			\
256		break;						\
257	default:						\
258		ZFS_ASM_BUG();					\
259	}							\
260}
261
262#define	COPY(r...)						\
263{								\
264	switch (REG_CNT(r)) {					\
265	case 8:							\
266		__asm__ __volatile__(				\
267		"vor " VR4(r) "," VR0(r) "," VR0(r) "\n"	\
268		"vor " VR5(r) "," VR1(r) "," VR1(r) "\n"	\
269		"vor " VR6(r) "," VR2(r) "," VR2(r) "\n"	\
270		"vor " VR7(r) "," VR3(r) "," VR3(r) "\n"	\
271		:	WVR4(r), WVR5(r), WVR6(r), WVR7(r)	\
272		:	RVR0(r), RVR1(r), RVR2(r), RVR3(r));	\
273		break;						\
274	case 4:							\
275		__asm__ __volatile__(				\
276		"vor " VR2(r) "," VR0(r) "," VR0(r) "\n"	\
277		"vor " VR3(r) "," VR1(r) "," VR1(r) "\n"	\
278		:	WVR2(r), WVR3(r)			\
279		:	RVR0(r), RVR1(r));			\
280		break;						\
281	default:						\
282		ZFS_ASM_BUG();					\
283	}							\
284}
285
286#define	LOAD(src, r...)						\
287{								\
288	switch (REG_CNT(r)) {					\
289	case 8:							\
290		__asm__ __volatile__(				\
291		"lvx " VR0(r) " ,0,%[SRC0]\n"			\
292		"lvx " VR1(r) " ,0,%[SRC1]\n"			\
293		"lvx " VR2(r) " ,0,%[SRC2]\n"			\
294		"lvx " VR3(r) " ,0,%[SRC3]\n"			\
295		"lvx " VR4(r) " ,0,%[SRC4]\n"			\
296		"lvx " VR5(r) " ,0,%[SRC5]\n"			\
297		"lvx " VR6(r) " ,0,%[SRC6]\n"			\
298		"lvx " VR7(r) " ,0,%[SRC7]\n"			\
299		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r),	\
300			WVR4(r), WVR5(r), WVR6(r), WVR7(r)	\
301		:	[SRC0] "r" ((OFFSET(src, 0))),		\
302		[SRC1] "r" ((OFFSET(src, 16))),			\
303		[SRC2] "r" ((OFFSET(src, 32))),			\
304		[SRC3] "r" ((OFFSET(src, 48))),			\
305		[SRC4] "r" ((OFFSET(src, 64))),			\
306		[SRC5] "r" ((OFFSET(src, 80))),			\
307		[SRC6] "r" ((OFFSET(src, 96))),			\
308		[SRC7] "r" ((OFFSET(src, 112))));		\
309		break;						\
310	case 4:							\
311		__asm__ __volatile__(				\
312		"lvx " VR0(r) " ,0,%[SRC0]\n"			\
313		"lvx " VR1(r) " ,0,%[SRC1]\n"			\
314		"lvx " VR2(r) " ,0,%[SRC2]\n"			\
315		"lvx " VR3(r) " ,0,%[SRC3]\n"			\
316		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r)	\
317		:	[SRC0] "r" ((OFFSET(src, 0))),		\
318		[SRC1] "r" ((OFFSET(src, 16))),			\
319		[SRC2] "r" ((OFFSET(src, 32))),			\
320		[SRC3] "r" ((OFFSET(src, 48))));		\
321		break;						\
322	case 2:							\
323		__asm__ __volatile__(				\
324		"lvx " VR0(r) " ,0,%[SRC0]\n"			\
325		"lvx " VR1(r) " ,0,%[SRC1]\n"			\
326		:	WVR0(r), WVR1(r)			\
327		:	[SRC0] "r" ((OFFSET(src, 0))),		\
328		[SRC1] "r" ((OFFSET(src, 16))));		\
329		break;						\
330	default:						\
331		ZFS_ASM_BUG();					\
332	}							\
333}
334
335#define	STORE(dst, r...)					\
336{								\
337	switch (REG_CNT(r)) {					\
338	case 8:							\
339		__asm__ __volatile__(				\
340		"stvx " VR0(r) " ,0,%[DST0]\n"			\
341		"stvx " VR1(r) " ,0,%[DST1]\n"			\
342		"stvx " VR2(r) " ,0,%[DST2]\n"			\
343		"stvx " VR3(r) " ,0,%[DST3]\n"			\
344		"stvx " VR4(r) " ,0,%[DST4]\n"			\
345		"stvx " VR5(r) " ,0,%[DST5]\n"			\
346		"stvx " VR6(r) " ,0,%[DST6]\n"			\
347		"stvx " VR7(r) " ,0,%[DST7]\n"			\
348		: :	[DST0] "r" ((OFFSET(dst, 0))),		\
349		[DST1] "r" ((OFFSET(dst, 16))),			\
350		[DST2] "r" ((OFFSET(dst, 32))),			\
351		[DST3] "r" ((OFFSET(dst, 48))),			\
352		[DST4] "r" ((OFFSET(dst, 64))),			\
353		[DST5] "r" ((OFFSET(dst, 80))),			\
354		[DST6] "r" ((OFFSET(dst, 96))),			\
355		[DST7] "r" ((OFFSET(dst, 112))),		\
356		RVR0(r), RVR1(r), RVR2(r), RVR3(r),		\
357		RVR4(r), RVR5(r), RVR6(r), RVR7(r)		\
358		:	"memory");				\
359		break;						\
360	case 4:							\
361		__asm__ __volatile__(				\
362		"stvx " VR0(r) " ,0,%[DST0]\n"			\
363		"stvx " VR1(r) " ,0,%[DST1]\n"			\
364		"stvx " VR2(r) " ,0,%[DST2]\n"			\
365		"stvx " VR3(r) " ,0,%[DST3]\n"			\
366		: :	[DST0] "r" ((OFFSET(dst, 0))),		\
367		[DST1] "r" ((OFFSET(dst, 16))),			\
368		[DST2] "r" ((OFFSET(dst, 32))),			\
369		[DST3] "r" ((OFFSET(dst, 48))),			\
370		RVR0(r), RVR1(r), RVR2(r), RVR3(r)		\
371		: "memory");					\
372		break;						\
373	case 2:							\
374		__asm__ __volatile__(				\
375		"stvx " VR0(r) " ,0,%[DST0]\n"			\
376		"stvx " VR1(r) " ,0,%[DST1]\n"			\
377		: :	[DST0] "r" ((OFFSET(dst, 0))),		\
378		[DST1] "r" ((OFFSET(dst, 16))),			\
379		RVR0(r), RVR1(r) : "memory");			\
380		break;						\
381	default:						\
382		ZFS_ASM_BUG();					\
383	}							\
384}
385
386/*
387 * Unfortunately cannot use the macro, because GCC
388 * will try to use the macro name and not value
389 * later on...
390 * Kept as a reference to what a numbered variable is
391 */
392#define	_00	"17"
393#define	_1d	"16"
394#define	_temp0	"19"
395#define	_temp1	"18"
396
397#define	MUL2_SETUP()						\
398{								\
399	__asm__ __volatile__(					\
400		"vspltisb " VR(16) ",14\n"			\
401		"vspltisb " VR(17) ",15\n"			\
402		"vaddubm " VR(16) "," VR(17) "," VR(16) "\n"	\
403		"vxor " VR(17) "," VR(17) "," VR(17) "\n"	\
404		:	WVR(16), WVR(17));			\
405}
406
407#define	MUL2(r...)						\
408{								\
409	switch (REG_CNT(r)) {					\
410	case 4:							\
411		__asm__ __volatile__(				\
412		"vcmpgtsb 19," VR(17) "," VR0(r) "\n"		\
413		"vcmpgtsb 18," VR(17) "," VR1(r) "\n"		\
414		"vcmpgtsb 21," VR(17) "," VR2(r) "\n"		\
415		"vcmpgtsb 20," VR(17) "," VR3(r) "\n"		\
416		"vand 19,19," VR(16) "\n"			\
417		"vand 18,18," VR(16) "\n"			\
418		"vand 21,21," VR(16) "\n"			\
419		"vand 20,20," VR(16) "\n"			\
420		"vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
421		"vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
422		"vaddubm " VR2(r) "," VR2(r) "," VR2(r) "\n"	\
423		"vaddubm " VR3(r) "," VR3(r) "," VR3(r) "\n"	\
424		"vxor " VR0(r) ",19," VR0(r) "\n"		\
425		"vxor " VR1(r) ",18," VR1(r) "\n"		\
426		"vxor " VR2(r) ",21," VR2(r) "\n"		\
427		"vxor " VR3(r) ",20," VR3(r) "\n"		\
428		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r)	\
429		:	RVR(17), RVR(16)			\
430		:	"v18", "v19", "v20", "v21");		\
431		break;						\
432	case 2:							\
433		__asm__ __volatile__(				\
434		"vcmpgtsb 19," VR(17) "," VR0(r) "\n"		\
435		"vcmpgtsb 18," VR(17) "," VR1(r) "\n"		\
436		"vand 19,19," VR(16) "\n"			\
437		"vand 18,18," VR(16) "\n"			\
438		"vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n"	\
439		"vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n"	\
440		"vxor " VR0(r) ",19," VR0(r) "\n"		\
441		"vxor " VR1(r) ",18," VR1(r) "\n"		\
442		:	UVR0(r), UVR1(r)			\
443		:	RVR(17), RVR(16)			\
444		:	"v18", "v19");				\
445		break;						\
446	default:						\
447		ZFS_ASM_BUG();					\
448	}							\
449}
450
451#define	MUL4(r...)						\
452{								\
453	MUL2(r);						\
454	MUL2(r);						\
455}
456
457/*
458 * Unfortunately cannot use the macro, because GCC
459 * will try to use the macro name and not value
460 * later on...
461 * Kept as a reference to what a register is
462 * (here we're using actual registers for the
463 * clobbered ones)
464 */
465#define	_0f		"15"
466#define	_a_save		"14"
467#define	_b_save		"13"
468#define	_lt_mod_a	"12"
469#define	_lt_clmul_a	"11"
470#define	_lt_mod_b	"10"
471#define	_lt_clmul_b	"15"
472
473#define	_MULx2(c, r...)						\
474{								\
475	switch (REG_CNT(r)) {					\
476	case 2:							\
477		__asm__ __volatile__(				\
478		/* lts for upper part */			\
479		"vspltisb 15,15\n"				\
480		"lvx 10,0,%[lt0]\n"				\
481		"lvx 11,0,%[lt1]\n"				\
482		/* upper part */				\
483		"vand 14," VR0(r) ",15\n"			\
484		"vand 13," VR1(r) ",15\n"			\
485		"vspltisb 15,4\n"				\
486		"vsrab " VR0(r) "," VR0(r) ",15\n"		\
487		"vsrab " VR1(r) "," VR1(r) ",15\n"		\
488								\
489		"vperm 12,10,10," VR0(r) "\n"			\
490		"vperm 10,10,10," VR1(r) "\n"			\
491		"vperm 15,11,11," VR0(r) "\n"			\
492		"vperm 11,11,11," VR1(r) "\n"			\
493								\
494		"vxor " VR0(r) ",15,12\n"			\
495		"vxor " VR1(r) ",11,10\n"			\
496		/* lts for lower part */			\
497		"lvx 10,0,%[lt2]\n"				\
498		"lvx 15,0,%[lt3]\n"				\
499		/* lower part */				\
500		"vperm 12,10,10,14\n"				\
501		"vperm 10,10,10,13\n"				\
502		"vperm 11,15,15,14\n"				\
503		"vperm 15,15,15,13\n"				\
504								\
505		"vxor " VR0(r) "," VR0(r) ",12\n"		\
506		"vxor " VR1(r) "," VR1(r) ",10\n"		\
507		"vxor " VR0(r) "," VR0(r) ",11\n"		\
508		"vxor " VR1(r) "," VR1(r) ",15\n"		\
509		: UVR0(r), UVR1(r)				\
510		: [lt0] "r" (&(gf_clmul_mod_lt[4*(c)+0][0])),	\
511		[lt1] "r" (&(gf_clmul_mod_lt[4*(c)+1][0])),	\
512		[lt2] "r" (&(gf_clmul_mod_lt[4*(c)+2][0])),	\
513		[lt3] "r" (&(gf_clmul_mod_lt[4*(c)+3][0]))	\
514		: "v10", "v11", "v12", "v13", "v14", "v15");	\
515		break;						\
516	default:						\
517		ZFS_ASM_BUG();					\
518	}							\
519}
520
521#define	MUL(c, r...)						\
522{								\
523	switch (REG_CNT(r)) {					\
524	case 4:							\
525		_MULx2(c, R_23(r));				\
526		_MULx2(c, R_01(r));				\
527		break;						\
528	case 2:							\
529		_MULx2(c, R_01(r));				\
530		break;						\
531	default:						\
532		ZFS_ASM_BUG();					\
533	}							\
534}
535
536#define	raidz_math_begin()	kfpu_begin()
537#define	raidz_math_end()	kfpu_end()
538
539/* Overkill... */
540#if 0 // defined(_KERNEL)
541#define	GEN_X_DEFINE_0_3()	\
542register unsigned char w0 asm("0") __attribute__((vector_size(16)));	\
543register unsigned char w1 asm("1") __attribute__((vector_size(16)));	\
544register unsigned char w2 asm("2") __attribute__((vector_size(16)));	\
545register unsigned char w3 asm("3") __attribute__((vector_size(16)));
546#define	GEN_X_DEFINE_4_5()	\
547register unsigned char w4 asm("4") __attribute__((vector_size(16)));	\
548register unsigned char w5 asm("5") __attribute__((vector_size(16)));
549#define	GEN_X_DEFINE_6_7()	\
550register unsigned char w6 asm("6") __attribute__((vector_size(16)));	\
551register unsigned char w7 asm("7") __attribute__((vector_size(16)));
552#define	GEN_X_DEFINE_8_9()	\
553register unsigned char w8 asm("8") __attribute__((vector_size(16)));	\
554register unsigned char w9 asm("9") __attribute__((vector_size(16)));
555#define	GEN_X_DEFINE_10_11()	\
556register unsigned char w10 asm("10") __attribute__((vector_size(16)));	\
557register unsigned char w11 asm("11") __attribute__((vector_size(16)));
558#define	GEN_X_DEFINE_12_15()	\
559register unsigned char w12 asm("12") __attribute__((vector_size(16)));	\
560register unsigned char w13 asm("13") __attribute__((vector_size(16)));	\
561register unsigned char w14 asm("14") __attribute__((vector_size(16)));	\
562register unsigned char w15 asm("15") __attribute__((vector_size(16)));
563#define	GEN_X_DEFINE_16()	\
564register unsigned char w16 asm("16") __attribute__((vector_size(16)));
565#define	GEN_X_DEFINE_17()	\
566register unsigned char w17 asm("17") __attribute__((vector_size(16)));
567#define	GEN_X_DEFINE_18_21()	\
568register unsigned char w18 asm("18") __attribute__((vector_size(16)));	\
569register unsigned char w19 asm("19") __attribute__((vector_size(16)));	\
570register unsigned char w20 asm("20") __attribute__((vector_size(16)));	\
571register unsigned char w21 asm("21") __attribute__((vector_size(16)));
572#define	GEN_X_DEFINE_22_23()	\
573register unsigned char w22 asm("22") __attribute__((vector_size(16)));	\
574register unsigned char w23 asm("23") __attribute__((vector_size(16)));
575#define	GEN_X_DEFINE_24_27()	\
576register unsigned char w24 asm("24") __attribute__((vector_size(16)));	\
577register unsigned char w25 asm("25") __attribute__((vector_size(16)));	\
578register unsigned char w26 asm("26") __attribute__((vector_size(16)));	\
579register unsigned char w27 asm("27") __attribute__((vector_size(16)));
580#define	GEN_X_DEFINE_28_30()	\
581register unsigned char w28 asm("28") __attribute__((vector_size(16)));	\
582register unsigned char w29 asm("29") __attribute__((vector_size(16)));	\
583register unsigned char w30 asm("30") __attribute__((vector_size(16)));
584#define	GEN_X_DEFINE_31()	\
585register unsigned char w31 asm("31") __attribute__((vector_size(16)));
586#define	GEN_X_DEFINE_32()	\
587register unsigned char w32 asm("31") __attribute__((vector_size(16)));
588#define	GEN_X_DEFINE_33_36()	\
589register unsigned char w33 asm("31") __attribute__((vector_size(16)));	\
590register unsigned char w34 asm("31") __attribute__((vector_size(16)));	\
591register unsigned char w35 asm("31") __attribute__((vector_size(16)));	\
592register unsigned char w36 asm("31") __attribute__((vector_size(16)));
593#define	GEN_X_DEFINE_37_38()	\
594register unsigned char w37 asm("31") __attribute__((vector_size(16)));	\
595register unsigned char w38 asm("31") __attribute__((vector_size(16)));
596#define	GEN_X_DEFINE_ALL()	\
597	GEN_X_DEFINE_0_3()	\
598	GEN_X_DEFINE_4_5()	\
599	GEN_X_DEFINE_6_7()	\
600	GEN_X_DEFINE_8_9()	\
601	GEN_X_DEFINE_10_11()	\
602	GEN_X_DEFINE_12_15()	\
603	GEN_X_DEFINE_16()	\
604	GEN_X_DEFINE_17()	\
605	GEN_X_DEFINE_18_21()	\
606	GEN_X_DEFINE_22_23()	\
607	GEN_X_DEFINE_24_27()	\
608	GEN_X_DEFINE_28_30()	\
609	GEN_X_DEFINE_31()	\
610	GEN_X_DEFINE_32()	\
611	GEN_X_DEFINE_33_36() 	\
612	GEN_X_DEFINE_37_38()
613#else
614#define	GEN_X_DEFINE_0_3()	\
615	unsigned char w0 __attribute__((vector_size(16)));	\
616	unsigned char w1 __attribute__((vector_size(16)));	\
617	unsigned char w2 __attribute__((vector_size(16)));	\
618	unsigned char w3 __attribute__((vector_size(16)));
619#define	GEN_X_DEFINE_4_5()	\
620	unsigned char w4 __attribute__((vector_size(16)));	\
621	unsigned char w5 __attribute__((vector_size(16)));
622#define	GEN_X_DEFINE_6_7()	\
623	unsigned char w6 __attribute__((vector_size(16)));	\
624	unsigned char w7 __attribute__((vector_size(16)));
625#define	GEN_X_DEFINE_8_9()	\
626	unsigned char w8 __attribute__((vector_size(16)));	\
627	unsigned char w9 __attribute__((vector_size(16)));
628#define	GEN_X_DEFINE_10_11()	\
629	unsigned char w10 __attribute__((vector_size(16)));	\
630	unsigned char w11 __attribute__((vector_size(16)));
631#define	GEN_X_DEFINE_12_15()	\
632	unsigned char w12 __attribute__((vector_size(16)));	\
633	unsigned char w13 __attribute__((vector_size(16)));	\
634	unsigned char w14 __attribute__((vector_size(16)));	\
635	unsigned char w15 __attribute__((vector_size(16)));
636#define	GEN_X_DEFINE_16()	\
637	unsigned char w16 __attribute__((vector_size(16)));
638#define	GEN_X_DEFINE_17()	\
639	unsigned char w17 __attribute__((vector_size(16)));
640#define	GEN_X_DEFINE_18_21()	\
641	unsigned char w18 __attribute__((vector_size(16)));	\
642	unsigned char w19 __attribute__((vector_size(16)));	\
643	unsigned char w20 __attribute__((vector_size(16)));	\
644	unsigned char w21 __attribute__((vector_size(16)));
645#define	GEN_X_DEFINE_22_23()	\
646	unsigned char w22 __attribute__((vector_size(16)));	\
647	unsigned char w23 __attribute__((vector_size(16)));
648#define	GEN_X_DEFINE_24_27()	\
649	unsigned char w24 __attribute__((vector_size(16)));	\
650	unsigned char w25 __attribute__((vector_size(16)));	\
651	unsigned char w26 __attribute__((vector_size(16)));	\
652	unsigned char w27 __attribute__((vector_size(16)));
653#define	GEN_X_DEFINE_28_30()	\
654	unsigned char w28 __attribute__((vector_size(16)));	\
655	unsigned char w29 __attribute__((vector_size(16)));	\
656	unsigned char w30 __attribute__((vector_size(16)));
657#define	GEN_X_DEFINE_31()	\
658	unsigned char w31 __attribute__((vector_size(16)));
659#define	GEN_X_DEFINE_32()	\
660	unsigned char w32 __attribute__((vector_size(16)));
661#define	GEN_X_DEFINE_33_36()	\
662	unsigned char w33 __attribute__((vector_size(16)));	\
663	unsigned char w34 __attribute__((vector_size(16)));	\
664	unsigned char w35 __attribute__((vector_size(16)));	\
665	unsigned char w36 __attribute__((vector_size(16)));
666#define	GEN_X_DEFINE_37_38()	\
667	unsigned char w37 __attribute__((vector_size(16)));	\
668	unsigned char w38 __attribute__((vector_size(16)));
669#define	GEN_X_DEFINE_ALL()	\
670	GEN_X_DEFINE_0_3()	\
671	GEN_X_DEFINE_4_5()	\
672	GEN_X_DEFINE_6_7()	\
673	GEN_X_DEFINE_8_9()	\
674	GEN_X_DEFINE_10_11()	\
675	GEN_X_DEFINE_12_15()	\
676	GEN_X_DEFINE_16()	\
677	GEN_X_DEFINE_17()	\
678	GEN_X_DEFINE_18_21()	\
679	GEN_X_DEFINE_22_23()	\
680	GEN_X_DEFINE_24_27()	\
681	GEN_X_DEFINE_28_30()	\
682	GEN_X_DEFINE_31()	\
683	GEN_X_DEFINE_32()	\
684	GEN_X_DEFINE_33_36()	\
685	GEN_X_DEFINE_37_38()
686#endif
687