1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
4 *
5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
6 */
7
8#include <linux/linkage.h>
9#include <linux/cfi_types.h>
10#include <asm/assembler.h>
11
12	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
13	.set		.Lv\b\().4s, \b
14	.endr
15
16	.macro		sm3partw1, rd, rn, rm
17	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
18	.endm
19
20	.macro		sm3partw2, rd, rn, rm
21	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
22	.endm
23
24	.macro		sm3ss1, rd, rn, rm, ra
25	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
26	.endm
27
28	.macro		sm3tt1a, rd, rn, rm, imm2
29	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
30	.endm
31
32	.macro		sm3tt1b, rd, rn, rm, imm2
33	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
34	.endm
35
36	.macro		sm3tt2a, rd, rn, rm, imm2
37	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
38	.endm
39
40	.macro		sm3tt2b, rd, rn, rm, imm2
41	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
42	.endm
43
44	.macro		round, ab, s0, t0, t1, i
45	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
46	shl		\t1\().4s, \t0\().4s, #1
47	sri		\t1\().4s, \t0\().4s, #31
48	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
49	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
50	.endm
51
52	.macro		qround, ab, s0, s1, s2, s3, s4
53	.ifnb		\s4
54	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
55	ext		v6.16b, \s0\().16b, \s1\().16b, #12
56	ext		v7.16b, \s2\().16b, \s3\().16b, #8
57	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
58	.endif
59
60	eor		v10.16b, \s0\().16b, \s1\().16b
61
62	round		\ab, \s0, v11, v12, 0
63	round		\ab, \s0, v12, v11, 1
64	round		\ab, \s0, v11, v12, 2
65	round		\ab, \s0, v12, v11, 3
66
67	.ifnb		\s4
68	sm3partw2	\s4\().4s, v7.4s, v6.4s
69	.endif
70	.endm
71
72	/*
73	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
74	 *                       int blocks)
75	 */
76	.text
77SYM_TYPED_FUNC_START(sm3_ce_transform)
78	/* load state */
79	ld1		{v8.4s-v9.4s}, [x0]
80	rev64		v8.4s, v8.4s
81	rev64		v9.4s, v9.4s
82	ext		v8.16b, v8.16b, v8.16b, #8
83	ext		v9.16b, v9.16b, v9.16b, #8
84
85	adr_l		x8, .Lt
86	ldp		s13, s14, [x8]
87
88	/* load input */
890:	ld1		{v0.16b-v3.16b}, [x1], #64
90	sub		w2, w2, #1
91
92	mov		v15.16b, v8.16b
93	mov		v16.16b, v9.16b
94
95CPU_LE(	rev32		v0.16b, v0.16b		)
96CPU_LE(	rev32		v1.16b, v1.16b		)
97CPU_LE(	rev32		v2.16b, v2.16b		)
98CPU_LE(	rev32		v3.16b, v3.16b		)
99
100	ext		v11.16b, v13.16b, v13.16b, #4
101
102	qround		a, v0, v1, v2, v3, v4
103	qround		a, v1, v2, v3, v4, v0
104	qround		a, v2, v3, v4, v0, v1
105	qround		a, v3, v4, v0, v1, v2
106
107	ext		v11.16b, v14.16b, v14.16b, #4
108
109	qround		b, v4, v0, v1, v2, v3
110	qround		b, v0, v1, v2, v3, v4
111	qround		b, v1, v2, v3, v4, v0
112	qround		b, v2, v3, v4, v0, v1
113	qround		b, v3, v4, v0, v1, v2
114	qround		b, v4, v0, v1, v2, v3
115	qround		b, v0, v1, v2, v3, v4
116	qround		b, v1, v2, v3, v4, v0
117	qround		b, v2, v3, v4, v0, v1
118	qround		b, v3, v4
119	qround		b, v4, v0
120	qround		b, v0, v1
121
122	eor		v8.16b, v8.16b, v15.16b
123	eor		v9.16b, v9.16b, v16.16b
124
125	/* handled all input blocks? */
126	cbnz		w2, 0b
127
128	/* save state */
129	rev64		v8.4s, v8.4s
130	rev64		v9.4s, v9.4s
131	ext		v8.16b, v8.16b, v8.16b, #8
132	ext		v9.16b, v9.16b, v9.16b, #8
133	st1		{v8.4s-v9.4s}, [x0]
134	ret
135SYM_FUNC_END(sm3_ce_transform)
136
137	.section	".rodata", "a"
138	.align		3
139.Lt:	.word		0x79cc4519, 0x9d8a7a87
140