1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * LoongArch SIMD XOR operations
4 *
5 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
6 */
7
8#include "xor_simd.h"
9
10/*
11 * Process one cache line (64 bytes) per loop. This is assuming all future
12 * popular LoongArch cores are similar performance-characteristics-wise to the
13 * current models.
14 */
15#define LINE_WIDTH 64
16
17#ifdef CONFIG_CPU_HAS_LSX
18
19#define LD(reg, base, offset)	\
20	"vld $vr" #reg ", %[" #base "], " #offset "\n\t"
21#define ST(reg, base, offset)	\
22	"vst $vr" #reg ", %[" #base "], " #offset "\n\t"
23#define XOR(dj, k)	"vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
24
25#define LD_INOUT_LINE(base)	\
26	LD(0, base, 0)		\
27	LD(1, base, 16)		\
28	LD(2, base, 32)		\
29	LD(3, base, 48)
30
31#define LD_AND_XOR_LINE(base)	\
32	LD(4, base, 0)		\
33	LD(5, base, 16)		\
34	LD(6, base, 32)		\
35	LD(7, base, 48)		\
36	XOR(0, 4)		\
37	XOR(1, 5)		\
38	XOR(2, 6)		\
39	XOR(3, 7)
40
41#define ST_LINE(base)		\
42	ST(0, base, 0)		\
43	ST(1, base, 16)		\
44	ST(2, base, 32)		\
45	ST(3, base, 48)
46
47#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
48#include "xor_template.c"
49
50#undef LD
51#undef ST
52#undef XOR
53#undef LD_INOUT_LINE
54#undef LD_AND_XOR_LINE
55#undef ST_LINE
56#undef XOR_FUNC_NAME
57
58#endif /* CONFIG_CPU_HAS_LSX */
59
60#ifdef CONFIG_CPU_HAS_LASX
61
62#define LD(reg, base, offset)	\
63	"xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
64#define ST(reg, base, offset)	\
65	"xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
66#define XOR(dj, k)	"xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
67
68#define LD_INOUT_LINE(base)	\
69	LD(0, base, 0)		\
70	LD(1, base, 32)
71
72#define LD_AND_XOR_LINE(base)	\
73	LD(2, base, 0)		\
74	LD(3, base, 32)		\
75	XOR(0, 2)		\
76	XOR(1, 3)
77
78#define ST_LINE(base)		\
79	ST(0, base, 0)		\
80	ST(1, base, 32)
81
82#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
83#include "xor_template.c"
84
85#undef LD
86#undef ST
87#undef XOR
88#undef LD_INOUT_LINE
89#undef LD_AND_XOR_LINE
90#undef ST_LINE
91#undef XOR_FUNC_NAME
92
93#endif /* CONFIG_CPU_HAS_LASX */
94