1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
4 *
5 * Template for XOR operations, instantiated in xor_simd.c.
6 *
7 * Expected preprocessor definitions:
8 *
9 * - LINE_WIDTH
10 * - XOR_FUNC_NAME(nr)
11 * - LD_INOUT_LINE(buf)
12 * - LD_AND_XOR_LINE(buf)
13 * - ST_LINE(buf)
14 */
15
16void XOR_FUNC_NAME(2)(unsigned long bytes,
17		      unsigned long * __restrict v1,
18		      const unsigned long * __restrict v2)
19{
20	unsigned long lines = bytes / LINE_WIDTH;
21
22	do {
23		__asm__ __volatile__ (
24			LD_INOUT_LINE(v1)
25			LD_AND_XOR_LINE(v2)
26			ST_LINE(v1)
27		: : [v1] "r"(v1), [v2] "r"(v2) : "memory"
28		);
29
30		v1 += LINE_WIDTH / sizeof(unsigned long);
31		v2 += LINE_WIDTH / sizeof(unsigned long);
32	} while (--lines > 0);
33}
34
35void XOR_FUNC_NAME(3)(unsigned long bytes,
36		      unsigned long * __restrict v1,
37		      const unsigned long * __restrict v2,
38		      const unsigned long * __restrict v3)
39{
40	unsigned long lines = bytes / LINE_WIDTH;
41
42	do {
43		__asm__ __volatile__ (
44			LD_INOUT_LINE(v1)
45			LD_AND_XOR_LINE(v2)
46			LD_AND_XOR_LINE(v3)
47			ST_LINE(v1)
48		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
49		);
50
51		v1 += LINE_WIDTH / sizeof(unsigned long);
52		v2 += LINE_WIDTH / sizeof(unsigned long);
53		v3 += LINE_WIDTH / sizeof(unsigned long);
54	} while (--lines > 0);
55}
56
57void XOR_FUNC_NAME(4)(unsigned long bytes,
58		      unsigned long * __restrict v1,
59		      const unsigned long * __restrict v2,
60		      const unsigned long * __restrict v3,
61		      const unsigned long * __restrict v4)
62{
63	unsigned long lines = bytes / LINE_WIDTH;
64
65	do {
66		__asm__ __volatile__ (
67			LD_INOUT_LINE(v1)
68			LD_AND_XOR_LINE(v2)
69			LD_AND_XOR_LINE(v3)
70			LD_AND_XOR_LINE(v4)
71			ST_LINE(v1)
72		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
73		: "memory"
74		);
75
76		v1 += LINE_WIDTH / sizeof(unsigned long);
77		v2 += LINE_WIDTH / sizeof(unsigned long);
78		v3 += LINE_WIDTH / sizeof(unsigned long);
79		v4 += LINE_WIDTH / sizeof(unsigned long);
80	} while (--lines > 0);
81}
82
83void XOR_FUNC_NAME(5)(unsigned long bytes,
84		      unsigned long * __restrict v1,
85		      const unsigned long * __restrict v2,
86		      const unsigned long * __restrict v3,
87		      const unsigned long * __restrict v4,
88		      const unsigned long * __restrict v5)
89{
90	unsigned long lines = bytes / LINE_WIDTH;
91
92	do {
93		__asm__ __volatile__ (
94			LD_INOUT_LINE(v1)
95			LD_AND_XOR_LINE(v2)
96			LD_AND_XOR_LINE(v3)
97			LD_AND_XOR_LINE(v4)
98			LD_AND_XOR_LINE(v5)
99			ST_LINE(v1)
100		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
101		    [v5] "r"(v5) : "memory"
102		);
103
104		v1 += LINE_WIDTH / sizeof(unsigned long);
105		v2 += LINE_WIDTH / sizeof(unsigned long);
106		v3 += LINE_WIDTH / sizeof(unsigned long);
107		v4 += LINE_WIDTH / sizeof(unsigned long);
108		v5 += LINE_WIDTH / sizeof(unsigned long);
109	} while (--lines > 0);
110}
111