1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * LoongArch SIMD XOR operations 4 * 5 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 6 */ 7 8#include "xor_simd.h" 9 10/* 11 * Process one cache line (64 bytes) per loop. This is assuming all future 12 * popular LoongArch cores are similar performance-characteristics-wise to the 13 * current models. 14 */ 15#define LINE_WIDTH 64 16 17#ifdef CONFIG_CPU_HAS_LSX 18 19#define LD(reg, base, offset) \ 20 "vld $vr" #reg ", %[" #base "], " #offset "\n\t" 21#define ST(reg, base, offset) \ 22 "vst $vr" #reg ", %[" #base "], " #offset "\n\t" 23#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t" 24 25#define LD_INOUT_LINE(base) \ 26 LD(0, base, 0) \ 27 LD(1, base, 16) \ 28 LD(2, base, 32) \ 29 LD(3, base, 48) 30 31#define LD_AND_XOR_LINE(base) \ 32 LD(4, base, 0) \ 33 LD(5, base, 16) \ 34 LD(6, base, 32) \ 35 LD(7, base, 48) \ 36 XOR(0, 4) \ 37 XOR(1, 5) \ 38 XOR(2, 6) \ 39 XOR(3, 7) 40 41#define ST_LINE(base) \ 42 ST(0, base, 0) \ 43 ST(1, base, 16) \ 44 ST(2, base, 32) \ 45 ST(3, base, 48) 46 47#define XOR_FUNC_NAME(nr) __xor_lsx_##nr 48#include "xor_template.c" 49 50#undef LD 51#undef ST 52#undef XOR 53#undef LD_INOUT_LINE 54#undef LD_AND_XOR_LINE 55#undef ST_LINE 56#undef XOR_FUNC_NAME 57 58#endif /* CONFIG_CPU_HAS_LSX */ 59 60#ifdef CONFIG_CPU_HAS_LASX 61 62#define LD(reg, base, offset) \ 63 "xvld $xr" #reg ", %[" #base "], " #offset "\n\t" 64#define ST(reg, base, offset) \ 65 "xvst $xr" #reg ", %[" #base "], " #offset "\n\t" 66#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t" 67 68#define LD_INOUT_LINE(base) \ 69 LD(0, base, 0) \ 70 LD(1, base, 32) 71 72#define LD_AND_XOR_LINE(base) \ 73 LD(2, base, 0) \ 74 LD(3, base, 32) \ 75 XOR(0, 2) \ 76 XOR(1, 3) 77 78#define ST_LINE(base) \ 79 ST(0, base, 0) \ 80 ST(1, base, 32) 81 82#define XOR_FUNC_NAME(nr) __xor_lasx_##nr 83#include "xor_template.c" 84 85#undef LD 86#undef ST 87#undef XOR 88#undef LD_INOUT_LINE 89#undef LD_AND_XOR_LINE 90#undef ST_LINE 91#undef XOR_FUNC_NAME 92 93#endif /* CONFIG_CPU_HAS_LASX */ 94