1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2022 Samuel Neves
25 * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de>
26 *
27 * This is converted assembly: SSE4.1 -> ARMv8-A
28 * Used tools: SIMDe https://github.com/simd-everywhere/simde
29 *
30 * Should work on FreeBSD, Linux and macOS
31 * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh
32 */
33
34#if defined(__aarch64__)
35
36/* make gcc <= 9 happy */
37#if !defined(LD_VERSION) || LD_VERSION >= 233010000
38#define CFI_NEGATE_RA_STATE .cfi_negate_ra_state
39#else
40#define CFI_NEGATE_RA_STATE
41#endif
42
43	.text
44	.section	.note.gnu.property,"a",@note
45	.p2align	3
46	.word	4
47	.word	16
48	.word	5
49	.asciz	"GNU"
50	.word	3221225472
51	.word	4
52	.word	3
53	.word	0
54.Lsec_end0:
55	.text
56	.globl	zfs_blake3_compress_in_place_sse41
57	.p2align	2
58	.type	zfs_blake3_compress_in_place_sse41,@function
59zfs_blake3_compress_in_place_sse41:
60	.cfi_startproc
61	hint	#25
62	CFI_NEGATE_RA_STATE
63	sub	sp, sp, #96
64	stp	x29, x30, [sp, #64]
65	add	x29, sp, #64
66	str	x19, [sp, #80]
67	.cfi_def_cfa w29, 32
68	.cfi_offset w19, -16
69	.cfi_offset w30, -24
70	.cfi_offset w29, -32
71	mov	x19, x0
72	mov	w5, w4
73	mov	x4, x3
74	mov	w3, w2
75	mov	x2, x1
76	mov	x0, sp
77	mov	x1, x19
78	bl	compress_pre
79	ldp	q0, q1, [sp]
80	ldp	q2, q3, [sp, #32]
81	eor	v0.16b, v2.16b, v0.16b
82	eor	v1.16b, v3.16b, v1.16b
83	ldp	x29, x30, [sp, #64]
84	stp	q0, q1, [x19]
85	ldr	x19, [sp, #80]
86	add	sp, sp, #96
87	hint	#29
88	ret
89.Lfunc_end0:
90	.size	zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41
91	.cfi_endproc
92
93	.section	.rodata.cst16,"aM",@progbits,16
94	.p2align	4
95.LCPI1_0:
96	.xword	-4942790177982912921
97	.xword	-6534734903820487822
98.LCPI1_1:
99	.byte	2
100	.byte	3
101	.byte	0
102	.byte	1
103	.byte	6
104	.byte	7
105	.byte	4
106	.byte	5
107	.byte	10
108	.byte	11
109	.byte	8
110	.byte	9
111	.byte	14
112	.byte	15
113	.byte	12
114	.byte	13
115.LCPI1_2:
116	.byte	1
117	.byte	2
118	.byte	3
119	.byte	0
120	.byte	5
121	.byte	6
122	.byte	7
123	.byte	4
124	.byte	9
125	.byte	10
126	.byte	11
127	.byte	8
128	.byte	13
129	.byte	14
130	.byte	15
131	.byte	12
132	.text
133	.p2align	2
134	.type	compress_pre,@function
135compress_pre:
136	.cfi_startproc
137	hint	#34
138	fmov	s1, w3
139	movi	d0, #0x0000ff000000ff
140	ldr	q2, [x1]
141	adrp	x8, .LCPI1_0
142	mov	v1.s[1], w5
143	str	q2, [x0]
144	ldr	q4, [x8, :lo12:.LCPI1_0]
145	ldr	q5, [x1, #16]
146	adrp	x8, .LCPI1_1
147	and	v0.8b, v1.8b, v0.8b
148	fmov	d1, x4
149	stp	q5, q4, [x0, #16]
150	mov	v1.d[1], v0.d[0]
151	str	q1, [x0, #48]
152	ldp	q6, q7, [x2]
153	uzp1	v3.4s, v6.4s, v7.4s
154	add	v0.4s, v2.4s, v3.4s
155	uzp2	v2.4s, v6.4s, v7.4s
156	add	v16.4s, v0.4s, v5.4s
157	ldr	q0, [x8, :lo12:.LCPI1_1]
158	adrp	x8, .LCPI1_2
159	eor	v1.16b, v16.16b, v1.16b
160	add	v7.4s, v16.4s, v2.4s
161	tbl	v1.16b, { v1.16b }, v0.16b
162	add	v4.4s, v1.4s, v4.4s
163	eor	v5.16b, v4.16b, v5.16b
164	ushr	v6.4s, v5.4s, #12
165	shl	v5.4s, v5.4s, #20
166	orr	v5.16b, v5.16b, v6.16b
167	add	v6.4s, v7.4s, v5.4s
168	eor	v7.16b, v1.16b, v6.16b
169	ldr	q1, [x8, :lo12:.LCPI1_2]
170	add	x8, x2, #32
171	tbl	v7.16b, { v7.16b }, v1.16b
172	ld2	{ v16.4s, v17.4s }, [x8]
173	add	v4.4s, v4.4s, v7.4s
174	ext	v7.16b, v7.16b, v7.16b, #8
175	add	v6.4s, v6.4s, v16.4s
176	eor	v5.16b, v4.16b, v5.16b
177	ext	v4.16b, v4.16b, v4.16b, #4
178	ext	v16.16b, v16.16b, v16.16b, #12
179	ext	v6.16b, v6.16b, v6.16b, #12
180	ushr	v18.4s, v5.4s, #7
181	shl	v5.4s, v5.4s, #25
182	orr	v5.16b, v5.16b, v18.16b
183	ext	v18.16b, v17.16b, v17.16b, #12
184	add	v6.4s, v6.4s, v5.4s
185	mov	v17.16b, v18.16b
186	eor	v7.16b, v7.16b, v6.16b
187	add	v6.4s, v6.4s, v18.4s
188	mov	v17.s[1], v16.s[2]
189	tbl	v7.16b, { v7.16b }, v0.16b
190	add	v4.4s, v4.4s, v7.4s
191	eor	v5.16b, v4.16b, v5.16b
192	ushr	v19.4s, v5.4s, #12
193	shl	v5.4s, v5.4s, #20
194	orr	v5.16b, v5.16b, v19.16b
195	uzp1	v19.4s, v3.4s, v3.4s
196	add	v6.4s, v6.4s, v5.4s
197	ext	v19.16b, v19.16b, v3.16b, #8
198	eor	v7.16b, v7.16b, v6.16b
199	uzp2	v19.4s, v19.4s, v2.4s
200	tbl	v7.16b, { v7.16b }, v1.16b
201	add	v6.4s, v6.4s, v19.4s
202	add	v4.4s, v4.4s, v7.4s
203	ext	v6.16b, v6.16b, v6.16b, #4
204	ext	v7.16b, v7.16b, v7.16b, #8
205	eor	v5.16b, v4.16b, v5.16b
206	ext	v4.16b, v4.16b, v4.16b, #12
207	ushr	v20.4s, v5.4s, #7
208	shl	v5.4s, v5.4s, #25
209	orr	v5.16b, v5.16b, v20.16b
210	ext	v20.16b, v3.16b, v3.16b, #12
211	add	v6.4s, v6.4s, v5.4s
212	ext	v3.16b, v3.16b, v20.16b, #12
213	eor	v7.16b, v7.16b, v6.16b
214	rev64	v3.4s, v3.4s
215	tbl	v7.16b, { v7.16b }, v0.16b
216	trn2	v3.4s, v3.4s, v17.4s
217	add	v4.4s, v4.4s, v7.4s
218	add	v6.4s, v6.4s, v3.4s
219	eor	v5.16b, v4.16b, v5.16b
220	ushr	v17.4s, v5.4s, #12
221	shl	v5.4s, v5.4s, #20
222	orr	v5.16b, v5.16b, v17.16b
223	zip1	v17.2d, v18.2d, v2.2d
224	zip2	v2.4s, v2.4s, v18.4s
225	add	v6.4s, v6.4s, v5.4s
226	mov	v17.s[3], v16.s[3]
227	zip1	v18.4s, v2.4s, v16.4s
228	zip1	v2.4s, v16.4s, v2.4s
229	eor	v7.16b, v7.16b, v6.16b
230	ext	v6.16b, v6.16b, v6.16b, #12
231	ext	v16.16b, v2.16b, v18.16b, #8
232	tbl	v7.16b, { v7.16b }, v1.16b
233	add	v20.4s, v4.4s, v7.4s
234	ext	v4.16b, v17.16b, v17.16b, #12
235	ext	v7.16b, v7.16b, v7.16b, #8
236	eor	v5.16b, v20.16b, v5.16b
237	uzp1	v4.4s, v17.4s, v4.4s
238	ushr	v17.4s, v5.4s, #7
239	shl	v5.4s, v5.4s, #25
240	add	v6.4s, v6.4s, v4.4s
241	orr	v5.16b, v5.16b, v17.16b
242	ext	v17.16b, v20.16b, v20.16b, #4
243	add	v6.4s, v6.4s, v5.4s
244	eor	v7.16b, v7.16b, v6.16b
245	add	v6.4s, v6.4s, v16.4s
246	tbl	v7.16b, { v7.16b }, v0.16b
247	add	v17.4s, v17.4s, v7.4s
248	eor	v5.16b, v17.16b, v5.16b
249	ushr	v2.4s, v5.4s, #12
250	shl	v5.4s, v5.4s, #20
251	orr	v2.16b, v5.16b, v2.16b
252	add	v5.4s, v6.4s, v2.4s
253	ext	v6.16b, v19.16b, v19.16b, #4
254	eor	v7.16b, v7.16b, v5.16b
255	uzp1	v18.4s, v6.4s, v6.4s
256	tbl	v7.16b, { v7.16b }, v1.16b
257	ext	v18.16b, v18.16b, v6.16b, #8
258	add	v17.4s, v17.4s, v7.4s
259	uzp2	v18.4s, v18.4s, v3.4s
260	ext	v7.16b, v7.16b, v7.16b, #8
261	eor	v2.16b, v17.16b, v2.16b
262	add	v5.4s, v5.4s, v18.4s
263	ext	v17.16b, v17.16b, v17.16b, #12
264	ushr	v19.4s, v2.4s, #7
265	shl	v2.4s, v2.4s, #25
266	ext	v5.16b, v5.16b, v5.16b, #4
267	orr	v2.16b, v2.16b, v19.16b
268	ext	v19.16b, v6.16b, v6.16b, #12
269	add	v5.4s, v5.4s, v2.4s
270	ext	v6.16b, v6.16b, v19.16b, #12
271	mov	v19.16b, v16.16b
272	eor	v7.16b, v7.16b, v5.16b
273	rev64	v6.4s, v6.4s
274	mov	v19.s[1], v4.s[2]
275	tbl	v7.16b, { v7.16b }, v0.16b
276	add	v17.4s, v17.4s, v7.4s
277	eor	v20.16b, v17.16b, v2.16b
278	trn2	v2.4s, v6.4s, v19.4s
279	ushr	v6.4s, v20.4s, #12
280	shl	v19.4s, v20.4s, #20
281	add	v5.4s, v5.4s, v2.4s
282	orr	v6.16b, v19.16b, v6.16b
283	add	v19.4s, v5.4s, v6.4s
284	eor	v5.16b, v7.16b, v19.16b
285	zip1	v7.2d, v16.2d, v3.2d
286	zip2	v3.4s, v3.4s, v16.4s
287	tbl	v20.16b, { v5.16b }, v1.16b
288	mov	v7.s[3], v4.s[3]
289	add	v17.4s, v17.4s, v20.4s
290	ext	v5.16b, v7.16b, v7.16b, #12
291	eor	v6.16b, v17.16b, v6.16b
292	uzp1	v5.4s, v7.4s, v5.4s
293	ext	v7.16b, v19.16b, v19.16b, #12
294	ext	v17.16b, v17.16b, v17.16b, #4
295	ushr	v19.4s, v6.4s, #7
296	shl	v6.4s, v6.4s, #25
297	add	v7.4s, v7.4s, v5.4s
298	orr	v6.16b, v6.16b, v19.16b
299	ext	v19.16b, v20.16b, v20.16b, #8
300	add	v7.4s, v7.4s, v6.4s
301	eor	v19.16b, v19.16b, v7.16b
302	tbl	v19.16b, { v19.16b }, v0.16b
303	add	v16.4s, v17.4s, v19.4s
304	zip1	v17.4s, v3.4s, v4.4s
305	zip1	v3.4s, v4.4s, v3.4s
306	eor	v4.16b, v16.16b, v6.16b
307	ext	v17.16b, v3.16b, v17.16b, #8
308	ushr	v3.4s, v4.4s, #12
309	shl	v4.4s, v4.4s, #20
310	add	v6.4s, v7.4s, v17.4s
311	orr	v3.16b, v4.16b, v3.16b
312	add	v4.4s, v6.4s, v3.4s
313	ext	v6.16b, v18.16b, v18.16b, #4
314	eor	v7.16b, v19.16b, v4.16b
315	uzp1	v18.4s, v6.4s, v6.4s
316	tbl	v7.16b, { v7.16b }, v1.16b
317	ext	v18.16b, v18.16b, v6.16b, #8
318	add	v16.4s, v16.4s, v7.4s
319	uzp2	v18.4s, v18.4s, v2.4s
320	ext	v7.16b, v7.16b, v7.16b, #8
321	eor	v3.16b, v16.16b, v3.16b
322	add	v4.4s, v4.4s, v18.4s
323	ext	v16.16b, v16.16b, v16.16b, #12
324	ushr	v19.4s, v3.4s, #7
325	shl	v3.4s, v3.4s, #25
326	ext	v4.16b, v4.16b, v4.16b, #4
327	orr	v3.16b, v3.16b, v19.16b
328	ext	v19.16b, v6.16b, v6.16b, #12
329	add	v4.4s, v4.4s, v3.4s
330	ext	v6.16b, v6.16b, v19.16b, #12
331	mov	v19.16b, v17.16b
332	eor	v7.16b, v7.16b, v4.16b
333	rev64	v6.4s, v6.4s
334	mov	v19.s[1], v5.s[2]
335	tbl	v7.16b, { v7.16b }, v0.16b
336	add	v16.4s, v16.4s, v7.4s
337	eor	v20.16b, v16.16b, v3.16b
338	trn2	v3.4s, v6.4s, v19.4s
339	ushr	v6.4s, v20.4s, #12
340	shl	v19.4s, v20.4s, #20
341	add	v4.4s, v4.4s, v3.4s
342	orr	v6.16b, v19.16b, v6.16b
343	zip1	v19.2d, v17.2d, v2.2d
344	zip2	v2.4s, v2.4s, v17.4s
345	add	v4.4s, v4.4s, v6.4s
346	mov	v19.s[3], v5.s[3]
347	zip1	v17.4s, v2.4s, v5.4s
348	zip1	v2.4s, v5.4s, v2.4s
349	eor	v7.16b, v7.16b, v4.16b
350	ext	v20.16b, v19.16b, v19.16b, #12
351	ext	v4.16b, v4.16b, v4.16b, #12
352	ext	v2.16b, v2.16b, v17.16b, #8
353	tbl	v7.16b, { v7.16b }, v1.16b
354	add	v16.4s, v16.4s, v7.4s
355	ext	v7.16b, v7.16b, v7.16b, #8
356	eor	v21.16b, v16.16b, v6.16b
357	uzp1	v6.4s, v19.4s, v20.4s
358	ext	v16.16b, v16.16b, v16.16b, #4
359	ushr	v19.4s, v21.4s, #7
360	shl	v20.4s, v21.4s, #25
361	add	v4.4s, v4.4s, v6.4s
362	orr	v19.16b, v20.16b, v19.16b
363	add	v4.4s, v4.4s, v19.4s
364	eor	v7.16b, v7.16b, v4.16b
365	add	v4.4s, v4.4s, v2.4s
366	tbl	v7.16b, { v7.16b }, v0.16b
367	add	v16.4s, v16.4s, v7.4s
368	eor	v5.16b, v16.16b, v19.16b
369	ushr	v17.4s, v5.4s, #12
370	shl	v5.4s, v5.4s, #20
371	orr	v5.16b, v5.16b, v17.16b
372	ext	v17.16b, v18.16b, v18.16b, #4
373	add	v4.4s, v4.4s, v5.4s
374	uzp1	v18.4s, v17.4s, v17.4s
375	eor	v7.16b, v7.16b, v4.16b
376	ext	v18.16b, v18.16b, v17.16b, #8
377	tbl	v7.16b, { v7.16b }, v1.16b
378	uzp2	v18.4s, v18.4s, v3.4s
379	add	v16.4s, v16.4s, v7.4s
380	add	v4.4s, v4.4s, v18.4s
381	ext	v7.16b, v7.16b, v7.16b, #8
382	eor	v5.16b, v16.16b, v5.16b
383	ext	v4.16b, v4.16b, v4.16b, #4
384	ext	v16.16b, v16.16b, v16.16b, #12
385	ushr	v19.4s, v5.4s, #7
386	shl	v5.4s, v5.4s, #25
387	orr	v5.16b, v5.16b, v19.16b
388	add	v19.4s, v4.4s, v5.4s
389	eor	v4.16b, v7.16b, v19.16b
390	ext	v7.16b, v17.16b, v17.16b, #12
391	tbl	v20.16b, { v4.16b }, v0.16b
392	ext	v4.16b, v17.16b, v7.16b, #12
393	mov	v7.16b, v2.16b
394	add	v16.4s, v16.4s, v20.4s
395	rev64	v4.4s, v4.4s
396	mov	v7.s[1], v6.s[2]
397	eor	v5.16b, v16.16b, v5.16b
398	trn2	v4.4s, v4.4s, v7.4s
399	ushr	v7.4s, v5.4s, #12
400	shl	v5.4s, v5.4s, #20
401	add	v17.4s, v19.4s, v4.4s
402	zip1	v19.2d, v2.2d, v3.2d
403	zip2	v2.4s, v3.4s, v2.4s
404	orr	v5.16b, v5.16b, v7.16b
405	mov	v19.s[3], v6.s[3]
406	add	v7.4s, v17.4s, v5.4s
407	eor	v17.16b, v20.16b, v7.16b
408	ext	v20.16b, v19.16b, v19.16b, #12
409	ext	v7.16b, v7.16b, v7.16b, #12
410	tbl	v17.16b, { v17.16b }, v1.16b
411	add	v16.4s, v16.4s, v17.4s
412	ext	v17.16b, v17.16b, v17.16b, #8
413	eor	v21.16b, v16.16b, v5.16b
414	uzp1	v5.4s, v19.4s, v20.4s
415	ext	v16.16b, v16.16b, v16.16b, #4
416	ushr	v19.4s, v21.4s, #7
417	shl	v20.4s, v21.4s, #25
418	add	v7.4s, v7.4s, v5.4s
419	orr	v19.16b, v20.16b, v19.16b
420	add	v7.4s, v7.4s, v19.4s
421	eor	v17.16b, v17.16b, v7.16b
422	tbl	v17.16b, { v17.16b }, v0.16b
423	add	v3.4s, v16.4s, v17.4s
424	zip1	v16.4s, v2.4s, v6.4s
425	zip1	v2.4s, v6.4s, v2.4s
426	eor	v6.16b, v3.16b, v19.16b
427	ext	v16.16b, v2.16b, v16.16b, #8
428	ushr	v2.4s, v6.4s, #12
429	shl	v6.4s, v6.4s, #20
430	add	v7.4s, v7.4s, v16.4s
431	orr	v2.16b, v6.16b, v2.16b
432	add	v6.4s, v7.4s, v2.4s
433	ext	v7.16b, v18.16b, v18.16b, #4
434	eor	v17.16b, v17.16b, v6.16b
435	uzp1	v18.4s, v7.4s, v7.4s
436	tbl	v17.16b, { v17.16b }, v1.16b
437	ext	v18.16b, v18.16b, v7.16b, #8
438	add	v3.4s, v3.4s, v17.4s
439	uzp2	v18.4s, v18.4s, v4.4s
440	eor	v2.16b, v3.16b, v2.16b
441	add	v6.4s, v6.4s, v18.4s
442	ext	v3.16b, v3.16b, v3.16b, #12
443	ext	v18.16b, v18.16b, v18.16b, #4
444	ushr	v19.4s, v2.4s, #7
445	shl	v2.4s, v2.4s, #25
446	ext	v6.16b, v6.16b, v6.16b, #4
447	orr	v19.16b, v2.16b, v19.16b
448	ext	v2.16b, v17.16b, v17.16b, #8
449	ext	v17.16b, v7.16b, v7.16b, #12
450	add	v6.4s, v6.4s, v19.4s
451	eor	v2.16b, v2.16b, v6.16b
452	tbl	v20.16b, { v2.16b }, v0.16b
453	ext	v2.16b, v7.16b, v17.16b, #12
454	mov	v7.16b, v16.16b
455	add	v17.4s, v3.4s, v20.4s
456	rev64	v3.4s, v2.4s
457	mov	v7.s[1], v5.s[2]
458	eor	v19.16b, v17.16b, v19.16b
459	trn2	v3.4s, v3.4s, v7.4s
460	ushr	v21.4s, v19.4s, #12
461	shl	v19.4s, v19.4s, #20
462	add	v6.4s, v6.4s, v3.4s
463	orr	v19.16b, v19.16b, v21.16b
464	add	v21.4s, v6.4s, v19.4s
465	eor	v6.16b, v20.16b, v21.16b
466	zip1	v20.2d, v16.2d, v4.2d
467	zip2	v4.4s, v4.4s, v16.4s
468	tbl	v22.16b, { v6.16b }, v1.16b
469	mov	v20.s[3], v5.s[3]
470	add	v17.4s, v17.4s, v22.4s
471	ext	v6.16b, v20.16b, v20.16b, #12
472	eor	v19.16b, v17.16b, v19.16b
473	uzp1	v6.4s, v20.4s, v6.4s
474	ext	v20.16b, v21.16b, v21.16b, #12
475	ext	v17.16b, v17.16b, v17.16b, #4
476	ushr	v21.4s, v19.4s, #7
477	shl	v19.4s, v19.4s, #25
478	add	v20.4s, v20.4s, v6.4s
479	orr	v19.16b, v19.16b, v21.16b
480	ext	v21.16b, v22.16b, v22.16b, #8
481	add	v20.4s, v20.4s, v19.4s
482	eor	v21.16b, v21.16b, v20.16b
483	tbl	v21.16b, { v21.16b }, v0.16b
484	add	v16.4s, v17.4s, v21.4s
485	zip1	v17.4s, v4.4s, v5.4s
486	zip1	v4.4s, v5.4s, v4.4s
487	eor	v5.16b, v16.16b, v19.16b
488	ext	v4.16b, v4.16b, v17.16b, #8
489	ushr	v17.4s, v5.4s, #12
490	shl	v5.4s, v5.4s, #20
491	add	v19.4s, v20.4s, v4.4s
492	ext	v20.16b, v18.16b, v18.16b, #8
493	zip1	v3.2d, v4.2d, v3.2d
494	orr	v5.16b, v5.16b, v17.16b
495	zip2	v2.4s, v2.4s, v4.4s
496	uzp2	v7.4s, v20.4s, v7.4s
497	mov	v3.s[3], v6.s[3]
498	add	v17.4s, v19.4s, v5.4s
499	ext	v7.16b, v7.16b, v20.16b, #4
500	eor	v19.16b, v21.16b, v17.16b
501	ext	v17.16b, v17.16b, v17.16b, #4
502	tbl	v19.16b, { v19.16b }, v1.16b
503	add	v7.4s, v17.4s, v7.4s
504	add	v16.4s, v16.4s, v19.4s
505	ext	v17.16b, v19.16b, v19.16b, #8
506	ext	v19.16b, v18.16b, v18.16b, #12
507	eor	v5.16b, v16.16b, v5.16b
508	ext	v16.16b, v16.16b, v16.16b, #12
509	ext	v18.16b, v18.16b, v19.16b, #12
510	mov	v19.16b, v4.16b
511	ushr	v20.4s, v5.4s, #7
512	shl	v5.4s, v5.4s, #25
513	rev64	v18.4s, v18.4s
514	mov	v19.s[1], v6.s[2]
515	orr	v5.16b, v5.16b, v20.16b
516	trn2	v18.4s, v18.4s, v19.4s
517	add	v7.4s, v5.4s, v7.4s
518	eor	v17.16b, v17.16b, v7.16b
519	add	v7.4s, v7.4s, v18.4s
520	ext	v18.16b, v3.16b, v3.16b, #12
521	tbl	v17.16b, { v17.16b }, v0.16b
522	uzp1	v3.4s, v3.4s, v18.4s
523	add	v16.4s, v16.4s, v17.4s
524	eor	v5.16b, v16.16b, v5.16b
525	ushr	v19.4s, v5.4s, #12
526	shl	v5.4s, v5.4s, #20
527	orr	v5.16b, v5.16b, v19.16b
528	add	v7.4s, v7.4s, v5.4s
529	eor	v17.16b, v17.16b, v7.16b
530	ext	v7.16b, v7.16b, v7.16b, #12
531	tbl	v17.16b, { v17.16b }, v1.16b
532	add	v3.4s, v7.4s, v3.4s
533	add	v16.4s, v16.4s, v17.4s
534	ext	v7.16b, v17.16b, v17.16b, #8
535	eor	v5.16b, v16.16b, v5.16b
536	ext	v16.16b, v16.16b, v16.16b, #4
537	ushr	v18.4s, v5.4s, #7
538	shl	v5.4s, v5.4s, #25
539	orr	v5.16b, v5.16b, v18.16b
540	add	v3.4s, v3.4s, v5.4s
541	eor	v7.16b, v7.16b, v3.16b
542	tbl	v0.16b, { v7.16b }, v0.16b
543	zip1	v7.4s, v2.4s, v6.4s
544	zip1	v2.4s, v6.4s, v2.4s
545	add	v4.4s, v16.4s, v0.4s
546	ext	v2.16b, v2.16b, v7.16b, #8
547	eor	v5.16b, v4.16b, v5.16b
548	add	v2.4s, v3.4s, v2.4s
549	ushr	v6.4s, v5.4s, #12
550	shl	v5.4s, v5.4s, #20
551	orr	v3.16b, v5.16b, v6.16b
552	add	v2.4s, v2.4s, v3.4s
553	eor	v0.16b, v0.16b, v2.16b
554	ext	v2.16b, v2.16b, v2.16b, #4
555	tbl	v0.16b, { v0.16b }, v1.16b
556	add	v1.4s, v4.4s, v0.4s
557	ext	v0.16b, v0.16b, v0.16b, #8
558	eor	v3.16b, v1.16b, v3.16b
559	ext	v1.16b, v1.16b, v1.16b, #12
560	ushr	v4.4s, v3.4s, #7
561	shl	v3.4s, v3.4s, #25
562	stp	q1, q0, [x0, #32]
563	orr	v3.16b, v3.16b, v4.16b
564	stp	q2, q3, [x0]
565	ret
566.Lfunc_end1:
567	.size	compress_pre, .Lfunc_end1-compress_pre
568	.cfi_endproc
569
570	.globl	zfs_blake3_compress_xof_sse41
571	.p2align	2
572	.type	zfs_blake3_compress_xof_sse41,@function
573zfs_blake3_compress_xof_sse41:
574	.cfi_startproc
575	hint	#25
576	CFI_NEGATE_RA_STATE
577	sub	sp, sp, #96
578	stp	x29, x30, [sp, #64]
579	add	x29, sp, #64
580	stp	x20, x19, [sp, #80]
581	.cfi_def_cfa w29, 32
582	.cfi_offset w19, -8
583	.cfi_offset w20, -16
584	.cfi_offset w30, -24
585	.cfi_offset w29, -32
586	mov	x20, x0
587	mov	x19, x5
588	mov	w5, w4
589	mov	x4, x3
590	mov	w3, w2
591	mov	x2, x1
592	mov	x0, sp
593	mov	x1, x20
594	bl	compress_pre
595	ldp	q0, q1, [sp]
596	ldp	q2, q3, [sp, #32]
597	eor	v0.16b, v2.16b, v0.16b
598	eor	v1.16b, v3.16b, v1.16b
599	ldp	x29, x30, [sp, #64]
600	stp	q0, q1, [x19]
601	ldr	q0, [x20]
602	eor	v0.16b, v0.16b, v2.16b
603	str	q0, [x19, #32]
604	ldr	q0, [x20, #16]
605	eor	v0.16b, v0.16b, v3.16b
606	str	q0, [x19, #48]
607	ldp	x20, x19, [sp, #80]
608	add	sp, sp, #96
609	hint	#29
610	ret
611.Lfunc_end2:
612	.size	zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41
613	.cfi_endproc
614
615	.section	.rodata.cst16,"aM",@progbits,16
616	.p2align	4
617.LCPI3_0:
618	.word	0
619	.word	1
620	.word	2
621	.word	3
622.LCPI3_1:
623	.byte	2
624	.byte	3
625	.byte	0
626	.byte	1
627	.byte	6
628	.byte	7
629	.byte	4
630	.byte	5
631	.byte	10
632	.byte	11
633	.byte	8
634	.byte	9
635	.byte	14
636	.byte	15
637	.byte	12
638	.byte	13
639.LCPI3_2:
640	.byte	1
641	.byte	2
642	.byte	3
643	.byte	0
644	.byte	5
645	.byte	6
646	.byte	7
647	.byte	4
648	.byte	9
649	.byte	10
650	.byte	11
651	.byte	8
652	.byte	13
653	.byte	14
654	.byte	15
655	.byte	12
656.LCPI3_3:
657	.word	1779033703
658	.word	3144134277
659	.word	1013904242
660	.word	2773480762
661	.text
662	.globl	zfs_blake3_hash_many_sse41
663	.p2align	2
664	.type	zfs_blake3_hash_many_sse41,@function
665zfs_blake3_hash_many_sse41:
666	.cfi_startproc
667	hint	#34
668	stp	d15, d14, [sp, #-144]!
669	stp	d13, d12, [sp, #16]
670	stp	d11, d10, [sp, #32]
671	stp	d9, d8, [sp, #48]
672	stp	x29, x27, [sp, #64]
673	stp	x26, x25, [sp, #80]
674	stp	x24, x23, [sp, #96]
675	stp	x22, x21, [sp, #112]
676	stp	x20, x19, [sp, #128]
677	sub	sp, sp, #368
678	.cfi_def_cfa_offset 512
679	.cfi_offset w19, -8
680	.cfi_offset w20, -16
681	.cfi_offset w21, -24
682	.cfi_offset w22, -32
683	.cfi_offset w23, -40
684	.cfi_offset w24, -48
685	.cfi_offset w25, -56
686	.cfi_offset w26, -64
687	.cfi_offset w27, -72
688	.cfi_offset w29, -80
689	.cfi_offset b8, -88
690	.cfi_offset b9, -96
691	.cfi_offset b10, -104
692	.cfi_offset b11, -112
693	.cfi_offset b12, -120
694	.cfi_offset b13, -128
695	.cfi_offset b14, -136
696	.cfi_offset b15, -144
697	ldr	x8, [sp, #520]
698	adrp	x11, .LCPI3_1
699	ldrb	w9, [sp, #512]
700	adrp	x10, .LCPI3_2
701	cmp	x1, #4
702	b.lo	.LBB3_6
703	adrp	x12, .LCPI3_0
704	sbfx	w13, w5, #0, #1
705	mov	w15, #58983
706	mov	w16, #44677
707	movk	w15, #27145, lsl #16
708	movk	w16, #47975, lsl #16
709	ldr	q0, [x12, :lo12:.LCPI3_0]
710	dup	v1.4s, w13
711	movi	v13.4s, #64
712	mov	w13, #62322
713	mov	w14, #62778
714	orr	w12, w7, w6
715	and	v0.16b, v1.16b, v0.16b
716	ldr	q1, [x11, :lo12:.LCPI3_1]
717	movk	w13, #15470, lsl #16
718	movk	w14, #42319, lsl #16
719	dup	v14.4s, w15
720	stp	q0, q1, [sp, #16]
721	orr	v0.4s, #128, lsl #24
722	str	q0, [sp]
723	dup	v0.4s, w16
724	stp	q0, q14, [sp, #48]
725	b	.LBB3_3
726.LBB3_2:
727	zip1	v0.4s, v29.4s, v8.4s
728	add	x15, x4, #4
729	zip1	v1.4s, v30.4s, v31.4s
730	tst	w5, #0x1
731	zip1	v2.4s, v24.4s, v18.4s
732	csel	x4, x15, x4, ne
733	zip1	v3.4s, v25.4s, v26.4s
734	add	x0, x0, #32
735	zip2	v6.4s, v29.4s, v8.4s
736	sub	x1, x1, #4
737	zip1	v4.2d, v0.2d, v1.2d
738	cmp	x1, #3
739	zip2	v7.4s, v30.4s, v31.4s
740	zip1	v5.2d, v2.2d, v3.2d
741	zip2	v0.2d, v0.2d, v1.2d
742	zip2	v1.2d, v2.2d, v3.2d
743	zip2	v2.4s, v24.4s, v18.4s
744	zip2	v3.4s, v25.4s, v26.4s
745	stp	q4, q5, [x8]
746	zip2	v4.2d, v6.2d, v7.2d
747	stp	q0, q1, [x8, #32]
748	zip1	v0.2d, v6.2d, v7.2d
749	zip1	v1.2d, v2.2d, v3.2d
750	zip2	v2.2d, v2.2d, v3.2d
751	stp	q0, q1, [x8, #64]
752	stp	q4, q2, [x8, #96]
753	add	x8, x8, #128
754	b.ls	.LBB3_6
755.LBB3_3:
756	mov	x15, x3
757	add	x16, x3, #8
758	add	x17, x3, #12
759	add	x19, x3, #16
760	add	x20, x3, #20
761	ld1r	{ v29.4s }, [x15], #4
762	ld1r	{ v30.4s }, [x16]
763	add	x16, x3, #24
764	ld1r	{ v31.4s }, [x17]
765	add	x17, x3, #28
766	ld1r	{ v24.4s }, [x19]
767	ld1r	{ v18.4s }, [x20]
768	ld1r	{ v25.4s }, [x16]
769	ld1r	{ v8.4s }, [x15]
770	ld1r	{ v26.4s }, [x17]
771	cbz	x2, .LBB3_2
772	ldr	q1, [sp, #16]
773	dup	v0.4s, w4
774	lsr	x17, x4, #32
775	mov	x15, xzr
776	ldp	x19, x20, [x0, #16]
777	add	v1.4s, v0.4s, v1.4s
778	mov	x21, x2
779	movi	v0.4s, #128, lsl #24
780	mov	w26, w12
781	str	q1, [sp, #96]
782	eor	v0.16b, v1.16b, v0.16b
783	ldr	q1, [sp]
784	cmgt	v0.4s, v1.4s, v0.4s
785	dup	v1.4s, w17
786	ldp	x16, x17, [x0]
787	sub	v0.4s, v1.4s, v0.4s
788	str	q0, [sp, #80]
789.LBB3_5:
790	add	x23, x16, x15
791	add	x24, x17, x15
792	add	x22, x19, x15
793	add	x25, x20, x15
794	subs	x21, x21, #1
795	add	x15, x15, #64
796	ldp	q1, q2, [x23]
797	csel	w27, w9, wzr, eq
798	orr	w26, w27, w26
799	and	w26, w26, #0xff
800	ldp	q4, q5, [x24]
801	dup	v0.4s, w26
802	mov	w26, w6
803	zip1	v22.4s, v1.4s, v4.4s
804	zip2	v20.4s, v1.4s, v4.4s
805	ldp	q6, q7, [x22]
806	zip1	v17.4s, v2.4s, v5.4s
807	zip2	v23.4s, v2.4s, v5.4s
808	ldp	q16, q21, [x25]
809	zip1	v19.4s, v6.4s, v16.4s
810	zip2	v1.4s, v6.4s, v16.4s
811	ldp	q27, q28, [x23, #32]
812	zip1	v4.4s, v7.4s, v21.4s
813	zip2	v5.4s, v7.4s, v21.4s
814	zip2	v15.2d, v17.2d, v4.2d
815	ldp	q9, q10, [x24, #32]
816	mov	v17.d[1], v4.d[0]
817	add	v4.4s, v30.4s, v25.4s
818	zip2	v11.2d, v23.2d, v5.2d
819	zip2	v3.4s, v27.4s, v9.4s
820	zip1	v7.4s, v27.4s, v9.4s
821	ldp	q12, q6, [x22, #32]
822	mov	v23.d[1], v5.d[0]
823	stp	q11, q3, [sp, #256]
824	add	v5.4s, v31.4s, v26.4s
825	add	v4.4s, v4.4s, v17.4s
826	str	q23, [sp, #352]
827	ldp	q16, q2, [x25, #32]
828	add	v5.4s, v5.4s, v23.4s
829	zip1	v3.4s, v12.4s, v16.4s
830	eor	v0.16b, v5.16b, v0.16b
831	zip1	v9.4s, v6.4s, v2.4s
832	zip2	v2.4s, v6.4s, v2.4s
833	stp	q7, q3, [sp, #208]
834	zip2	v3.4s, v12.4s, v16.4s
835	zip1	v12.4s, v28.4s, v10.4s
836	zip2	v10.4s, v28.4s, v10.4s
837	stp	q17, q2, [sp, #160]
838	zip2	v28.2d, v22.2d, v19.2d
839	mov	v22.d[1], v19.d[0]
840	str	q3, [sp, #240]
841	add	v2.4s, v8.4s, v18.4s
842	eor	v16.16b, v4.16b, v13.16b
843	dup	v17.4s, w13
844	mov	v3.16b, v22.16b
845	stp	q22, q28, [sp, #320]
846	zip2	v22.2d, v20.2d, v1.2d
847	mov	v20.d[1], v1.d[0]
848	add	v1.4s, v29.4s, v24.4s
849	add	v4.4s, v4.4s, v15.4s
850	add	v5.4s, v5.4s, v11.4s
851	add	v2.4s, v2.4s, v20.4s
852	stp	q15, q20, [sp, #288]
853	add	v1.4s, v1.4s, v3.4s
854	ldr	q3, [sp, #96]
855	dup	v20.4s, w14
856	mov	v23.16b, v22.16b
857	mov	v15.16b, v10.16b
858	eor	v6.16b, v1.16b, v3.16b
859	ldr	q3, [sp, #80]
860	add	v1.4s, v1.4s, v28.4s
861	ldr	q28, [sp, #272]
862	str	q23, [sp, #128]
863	eor	v7.16b, v2.16b, v3.16b
864	ldp	q27, q3, [sp, #32]
865	add	v2.4s, v2.4s, v22.4s
866	tbl	v6.16b, { v6.16b }, v27.16b
867	tbl	v7.16b, { v7.16b }, v27.16b
868	tbl	v16.16b, { v16.16b }, v27.16b
869	tbl	v0.16b, { v0.16b }, v27.16b
870	add	v19.4s, v6.4s, v14.4s
871	add	v21.4s, v7.4s, v3.4s
872	add	v30.4s, v16.4s, v17.4s
873	add	v31.4s, v0.4s, v20.4s
874	eor	v24.16b, v19.16b, v24.16b
875	eor	v17.16b, v21.16b, v18.16b
876	ushr	v18.4s, v24.4s, #12
877	shl	v20.4s, v24.4s, #20
878	eor	v24.16b, v30.16b, v25.16b
879	eor	v25.16b, v31.16b, v26.16b
880	ushr	v26.4s, v17.4s, #12
881	shl	v17.4s, v17.4s, #20
882	ushr	v29.4s, v24.4s, #12
883	shl	v24.4s, v24.4s, #20
884	ushr	v8.4s, v25.4s, #12
885	shl	v25.4s, v25.4s, #20
886	orr	v3.16b, v20.16b, v18.16b
887	ldr	q18, [x10, :lo12:.LCPI3_2]
888	orr	v13.16b, v17.16b, v26.16b
889	orr	v24.16b, v24.16b, v29.16b
890	orr	v14.16b, v25.16b, v8.16b
891	add	v8.4s, v1.4s, v3.4s
892	add	v29.4s, v2.4s, v13.4s
893	add	v17.4s, v4.4s, v24.4s
894	add	v20.4s, v5.4s, v14.4s
895	eor	v1.16b, v6.16b, v8.16b
896	eor	v2.16b, v7.16b, v29.16b
897	eor	v4.16b, v16.16b, v17.16b
898	eor	v0.16b, v0.16b, v20.16b
899	tbl	v25.16b, { v1.16b }, v18.16b
900	tbl	v16.16b, { v2.16b }, v18.16b
901	tbl	v6.16b, { v4.16b }, v18.16b
902	tbl	v4.16b, { v0.16b }, v18.16b
903	add	v19.4s, v19.4s, v25.4s
904	add	v21.4s, v21.4s, v16.4s
905	add	v26.4s, v30.4s, v6.4s
906	add	v7.4s, v31.4s, v4.4s
907	eor	v0.16b, v19.16b, v3.16b
908	eor	v1.16b, v21.16b, v13.16b
909	eor	v2.16b, v26.16b, v24.16b
910	eor	v3.16b, v7.16b, v14.16b
911	ushr	v5.4s, v0.4s, #7
912	shl	v0.4s, v0.4s, #25
913	ushr	v24.4s, v1.4s, #7
914	shl	v1.4s, v1.4s, #25
915	ushr	v30.4s, v2.4s, #7
916	shl	v2.4s, v2.4s, #25
917	orr	v5.16b, v0.16b, v5.16b
918	orr	v0.16b, v1.16b, v24.16b
919	ushr	v31.4s, v3.4s, #7
920	orr	v2.16b, v2.16b, v30.16b
921	ldp	q24, q30, [sp, #208]
922	shl	v3.4s, v3.4s, #25
923	zip2	v14.2d, v12.2d, v9.2d
924	mov	v22.16b, v24.16b
925	orr	v1.16b, v3.16b, v31.16b
926	zip2	v3.2d, v24.2d, v30.2d
927	mov	v24.16b, v28.16b
928	mov	v22.d[1], v30.d[0]
929	ldr	q30, [sp, #240]
930	mov	v31.16b, v12.16b
931	stp	q22, q14, [sp, #224]
932	mov	v24.d[1], v30.d[0]
933	add	v12.4s, v8.4s, v22.4s
934	mov	v31.d[1], v9.d[0]
935	add	v22.4s, v29.4s, v24.4s
936	ldr	q29, [sp, #176]
937	zip2	v28.2d, v28.2d, v30.2d
938	mov	v9.16b, v24.16b
939	mov	v15.d[1], v29.d[0]
940	zip2	v8.2d, v10.2d, v29.2d
941	add	v10.4s, v12.4s, v0.4s
942	add	v22.4s, v22.4s, v2.4s
943	str	q9, [sp, #144]
944	add	v20.4s, v20.4s, v15.4s
945	add	v17.4s, v17.4s, v31.4s
946	stp	q3, q8, [sp, #192]
947	eor	v4.16b, v4.16b, v10.16b
948	eor	v25.16b, v25.16b, v22.16b
949	add	v20.4s, v20.4s, v5.4s
950	add	v17.4s, v17.4s, v1.4s
951	tbl	v4.16b, { v4.16b }, v27.16b
952	tbl	v25.16b, { v25.16b }, v27.16b
953	eor	v6.16b, v6.16b, v20.16b
954	eor	v16.16b, v16.16b, v17.16b
955	add	v26.4s, v26.4s, v4.4s
956	add	v7.4s, v7.4s, v25.4s
957	tbl	v6.16b, { v6.16b }, v27.16b
958	tbl	v16.16b, { v16.16b }, v27.16b
959	eor	v0.16b, v26.16b, v0.16b
960	eor	v2.16b, v7.16b, v2.16b
961	add	v21.4s, v21.4s, v6.4s
962	add	v19.4s, v19.4s, v16.4s
963	ushr	v12.4s, v0.4s, #12
964	shl	v0.4s, v0.4s, #20
965	ushr	v13.4s, v2.4s, #12
966	shl	v2.4s, v2.4s, #20
967	eor	v5.16b, v21.16b, v5.16b
968	eor	v1.16b, v19.16b, v1.16b
969	orr	v0.16b, v0.16b, v12.16b
970	add	v10.4s, v10.4s, v3.4s
971	orr	v2.16b, v2.16b, v13.16b
972	ushr	v13.4s, v5.4s, #12
973	shl	v5.4s, v5.4s, #20
974	add	v22.4s, v22.4s, v28.4s
975	ushr	v12.4s, v1.4s, #12
976	shl	v1.4s, v1.4s, #20
977	add	v10.4s, v10.4s, v0.4s
978	orr	v5.16b, v5.16b, v13.16b
979	add	v22.4s, v22.4s, v2.4s
980	add	v20.4s, v20.4s, v8.4s
981	orr	v1.16b, v1.16b, v12.16b
982	add	v17.4s, v17.4s, v14.4s
983	eor	v4.16b, v4.16b, v10.16b
984	eor	v25.16b, v25.16b, v22.16b
985	add	v20.4s, v20.4s, v5.4s
986	add	v17.4s, v17.4s, v1.4s
987	tbl	v4.16b, { v4.16b }, v18.16b
988	tbl	v25.16b, { v25.16b }, v18.16b
989	eor	v6.16b, v6.16b, v20.16b
990	eor	v16.16b, v16.16b, v17.16b
991	add	v26.4s, v26.4s, v4.4s
992	add	v7.4s, v7.4s, v25.4s
993	tbl	v6.16b, { v6.16b }, v18.16b
994	tbl	v16.16b, { v16.16b }, v18.16b
995	eor	v0.16b, v26.16b, v0.16b
996	eor	v2.16b, v7.16b, v2.16b
997	add	v21.4s, v21.4s, v6.4s
998	add	v19.4s, v19.4s, v16.4s
999	ushr	v12.4s, v0.4s, #7
1000	shl	v0.4s, v0.4s, #25
1001	ushr	v13.4s, v2.4s, #7
1002	shl	v2.4s, v2.4s, #25
1003	eor	v5.16b, v21.16b, v5.16b
1004	eor	v1.16b, v19.16b, v1.16b
1005	orr	v0.16b, v0.16b, v12.16b
1006	add	v22.4s, v22.4s, v23.4s
1007	orr	v2.16b, v2.16b, v13.16b
1008	ushr	v13.4s, v5.4s, #7
1009	shl	v5.4s, v5.4s, #25
1010	add	v17.4s, v17.4s, v11.4s
1011	mov	v30.16b, v28.16b
1012	mov	v28.16b, v23.16b
1013	ldr	q23, [sp, #304]
1014	ushr	v12.4s, v1.4s, #7
1015	shl	v1.4s, v1.4s, #25
1016	add	v22.4s, v22.4s, v0.4s
1017	mov	v29.16b, v31.16b
1018	ldr	q31, [sp, #160]
1019	orr	v5.16b, v5.16b, v13.16b
1020	add	v17.4s, v17.4s, v2.4s
1021	add	v10.4s, v10.4s, v23.4s
1022	orr	v1.16b, v1.16b, v12.16b
1023	str	q29, [sp, #272]
1024	eor	v16.16b, v16.16b, v22.16b
1025	add	v20.4s, v20.4s, v31.4s
1026	eor	v6.16b, v6.16b, v17.16b
1027	add	v10.4s, v10.4s, v5.4s
1028	tbl	v16.16b, { v16.16b }, v27.16b
1029	add	v20.4s, v20.4s, v1.4s
1030	tbl	v6.16b, { v6.16b }, v27.16b
1031	eor	v25.16b, v25.16b, v10.16b
1032	add	v21.4s, v21.4s, v16.4s
1033	eor	v4.16b, v4.16b, v20.16b
1034	add	v26.4s, v26.4s, v6.4s
1035	tbl	v25.16b, { v25.16b }, v27.16b
1036	eor	v0.16b, v21.16b, v0.16b
1037	tbl	v4.16b, { v4.16b }, v27.16b
1038	eor	v2.16b, v26.16b, v2.16b
1039	add	v19.4s, v19.4s, v25.4s
1040	ushr	v12.4s, v0.4s, #12
1041	shl	v0.4s, v0.4s, #20
1042	add	v7.4s, v7.4s, v4.4s
1043	ushr	v13.4s, v2.4s, #12
1044	shl	v2.4s, v2.4s, #20
1045	eor	v5.16b, v5.16b, v19.16b
1046	add	v22.4s, v22.4s, v24.4s
1047	ldr	q24, [sp, #320]
1048	orr	v0.16b, v0.16b, v12.16b
1049	eor	v1.16b, v7.16b, v1.16b
1050	orr	v2.16b, v2.16b, v13.16b
1051	ushr	v12.4s, v5.4s, #12
1052	shl	v5.4s, v5.4s, #20
1053	add	v17.4s, v17.4s, v24.4s
1054	ldr	q24, [sp, #352]
1055	ushr	v13.4s, v1.4s, #12
1056	shl	v1.4s, v1.4s, #20
1057	add	v22.4s, v22.4s, v0.4s
1058	orr	v5.16b, v5.16b, v12.16b
1059	add	v17.4s, v17.4s, v2.4s
1060	add	v10.4s, v10.4s, v24.4s
1061	ldr	q24, [sp, #336]
1062	orr	v1.16b, v1.16b, v13.16b
1063	eor	v16.16b, v16.16b, v22.16b
1064	add	v20.4s, v20.4s, v14.4s
1065	eor	v6.16b, v6.16b, v17.16b
1066	add	v10.4s, v10.4s, v5.4s
1067	tbl	v16.16b, { v16.16b }, v18.16b
1068	add	v20.4s, v20.4s, v1.4s
1069	tbl	v6.16b, { v6.16b }, v18.16b
1070	eor	v25.16b, v25.16b, v10.16b
1071	add	v21.4s, v21.4s, v16.4s
1072	eor	v4.16b, v4.16b, v20.16b
1073	add	v26.4s, v26.4s, v6.4s
1074	tbl	v25.16b, { v25.16b }, v18.16b
1075	eor	v0.16b, v21.16b, v0.16b
1076	tbl	v4.16b, { v4.16b }, v18.16b
1077	eor	v2.16b, v26.16b, v2.16b
1078	add	v19.4s, v19.4s, v25.4s
1079	ushr	v12.4s, v0.4s, #7
1080	shl	v0.4s, v0.4s, #25
1081	add	v7.4s, v7.4s, v4.4s
1082	ushr	v13.4s, v2.4s, #7
1083	shl	v2.4s, v2.4s, #25
1084	eor	v5.16b, v19.16b, v5.16b
1085	orr	v0.16b, v0.16b, v12.16b
1086	eor	v1.16b, v7.16b, v1.16b
1087	add	v10.4s, v10.4s, v24.4s
1088	orr	v2.16b, v2.16b, v13.16b
1089	ushr	v12.4s, v5.4s, #7
1090	shl	v5.4s, v5.4s, #25
1091	add	v22.4s, v22.4s, v29.4s
1092	ushr	v13.4s, v1.4s, #7
1093	shl	v1.4s, v1.4s, #25
1094	add	v10.4s, v10.4s, v0.4s
1095	orr	v5.16b, v5.16b, v12.16b
1096	add	v22.4s, v22.4s, v2.4s
1097	add	v20.4s, v20.4s, v8.4s
1098	ldr	q8, [sp, #288]
1099	orr	v1.16b, v1.16b, v13.16b
1100	add	v17.4s, v17.4s, v3.4s
1101	ldr	q3, [sp, #352]
1102	eor	v4.16b, v4.16b, v10.16b
1103	eor	v25.16b, v25.16b, v22.16b
1104	add	v20.4s, v20.4s, v5.4s
1105	add	v17.4s, v17.4s, v1.4s
1106	tbl	v4.16b, { v4.16b }, v27.16b
1107	tbl	v25.16b, { v25.16b }, v27.16b
1108	eor	v6.16b, v6.16b, v20.16b
1109	eor	v16.16b, v16.16b, v17.16b
1110	add	v26.4s, v26.4s, v4.4s
1111	add	v7.4s, v7.4s, v25.4s
1112	tbl	v6.16b, { v6.16b }, v27.16b
1113	tbl	v16.16b, { v16.16b }, v27.16b
1114	eor	v0.16b, v26.16b, v0.16b
1115	eor	v2.16b, v7.16b, v2.16b
1116	add	v21.4s, v21.4s, v6.4s
1117	add	v19.4s, v19.4s, v16.4s
1118	ushr	v12.4s, v0.4s, #12
1119	shl	v0.4s, v0.4s, #20
1120	ushr	v13.4s, v2.4s, #12
1121	shl	v2.4s, v2.4s, #20
1122	eor	v5.16b, v21.16b, v5.16b
1123	eor	v1.16b, v19.16b, v1.16b
1124	orr	v0.16b, v0.16b, v12.16b
1125	add	v10.4s, v10.4s, v30.4s
1126	orr	v2.16b, v2.16b, v13.16b
1127	ushr	v13.4s, v5.4s, #12
1128	shl	v5.4s, v5.4s, #20
1129	add	v22.4s, v22.4s, v8.4s
1130	mov	v24.16b, v30.16b
1131	mov	v30.16b, v15.16b
1132	add	v17.4s, v17.4s, v15.4s
1133	ldr	q15, [sp, #224]
1134	ushr	v12.4s, v1.4s, #12
1135	shl	v1.4s, v1.4s, #20
1136	add	v10.4s, v10.4s, v0.4s
1137	str	q30, [sp, #176]
1138	orr	v5.16b, v5.16b, v13.16b
1139	add	v22.4s, v22.4s, v2.4s
1140	add	v20.4s, v20.4s, v15.4s
1141	orr	v1.16b, v1.16b, v12.16b
1142	eor	v4.16b, v4.16b, v10.16b
1143	eor	v25.16b, v25.16b, v22.16b
1144	add	v20.4s, v20.4s, v5.4s
1145	add	v17.4s, v17.4s, v1.4s
1146	tbl	v4.16b, { v4.16b }, v18.16b
1147	tbl	v25.16b, { v25.16b }, v18.16b
1148	eor	v6.16b, v6.16b, v20.16b
1149	eor	v16.16b, v16.16b, v17.16b
1150	add	v26.4s, v26.4s, v4.4s
1151	add	v7.4s, v7.4s, v25.4s
1152	tbl	v6.16b, { v6.16b }, v18.16b
1153	tbl	v16.16b, { v16.16b }, v18.16b
1154	eor	v0.16b, v26.16b, v0.16b
1155	eor	v2.16b, v7.16b, v2.16b
1156	add	v21.4s, v21.4s, v6.4s
1157	add	v19.4s, v19.4s, v16.4s
1158	ushr	v12.4s, v0.4s, #7
1159	shl	v0.4s, v0.4s, #25
1160	ushr	v13.4s, v2.4s, #7
1161	shl	v2.4s, v2.4s, #25
1162	eor	v5.16b, v21.16b, v5.16b
1163	eor	v1.16b, v19.16b, v1.16b
1164	orr	v0.16b, v0.16b, v12.16b
1165	add	v22.4s, v22.4s, v9.4s
1166	orr	v2.16b, v2.16b, v13.16b
1167	ushr	v13.4s, v5.4s, #7
1168	shl	v5.4s, v5.4s, #25
1169	add	v17.4s, v17.4s, v14.4s
1170	ushr	v12.4s, v1.4s, #7
1171	shl	v1.4s, v1.4s, #25
1172	add	v22.4s, v22.4s, v0.4s
1173	orr	v5.16b, v5.16b, v13.16b
1174	add	v17.4s, v17.4s, v2.4s
1175	add	v10.4s, v10.4s, v28.4s
1176	orr	v1.16b, v1.16b, v12.16b
1177	eor	v16.16b, v16.16b, v22.16b
1178	add	v20.4s, v20.4s, v11.4s
1179	eor	v6.16b, v6.16b, v17.16b
1180	add	v10.4s, v10.4s, v5.4s
1181	tbl	v16.16b, { v16.16b }, v27.16b
1182	add	v20.4s, v20.4s, v1.4s
1183	tbl	v6.16b, { v6.16b }, v27.16b
1184	eor	v25.16b, v25.16b, v10.16b
1185	add	v21.4s, v21.4s, v16.4s
1186	eor	v4.16b, v4.16b, v20.16b
1187	add	v26.4s, v26.4s, v6.4s
1188	tbl	v25.16b, { v25.16b }, v27.16b
1189	eor	v0.16b, v21.16b, v0.16b
1190	tbl	v4.16b, { v4.16b }, v27.16b
1191	eor	v2.16b, v26.16b, v2.16b
1192	add	v19.4s, v19.4s, v25.4s
1193	ushr	v12.4s, v0.4s, #12
1194	shl	v0.4s, v0.4s, #20
1195	add	v7.4s, v7.4s, v4.4s
1196	ushr	v13.4s, v2.4s, #12
1197	shl	v2.4s, v2.4s, #20
1198	eor	v5.16b, v5.16b, v19.16b
1199	orr	v0.16b, v0.16b, v12.16b
1200	eor	v1.16b, v7.16b, v1.16b
1201	add	v22.4s, v22.4s, v29.4s
1202	orr	v2.16b, v2.16b, v13.16b
1203	ushr	v12.4s, v5.4s, #12
1204	shl	v5.4s, v5.4s, #20
1205	add	v17.4s, v17.4s, v23.4s
1206	ushr	v13.4s, v1.4s, #12
1207	shl	v1.4s, v1.4s, #20
1208	add	v22.4s, v22.4s, v0.4s
1209	orr	v5.16b, v5.16b, v12.16b
1210	add	v17.4s, v17.4s, v2.4s
1211	add	v10.4s, v10.4s, v31.4s
1212	orr	v1.16b, v1.16b, v13.16b
1213	eor	v16.16b, v16.16b, v22.16b
1214	add	v20.4s, v20.4s, v30.4s
1215	eor	v6.16b, v6.16b, v17.16b
1216	add	v10.4s, v10.4s, v5.4s
1217	tbl	v16.16b, { v16.16b }, v18.16b
1218	add	v20.4s, v20.4s, v1.4s
1219	tbl	v6.16b, { v6.16b }, v18.16b
1220	eor	v25.16b, v25.16b, v10.16b
1221	add	v21.4s, v21.4s, v16.4s
1222	eor	v4.16b, v4.16b, v20.16b
1223	add	v26.4s, v26.4s, v6.4s
1224	tbl	v25.16b, { v25.16b }, v18.16b
1225	eor	v0.16b, v21.16b, v0.16b
1226	tbl	v4.16b, { v4.16b }, v18.16b
1227	eor	v2.16b, v26.16b, v2.16b
1228	add	v19.4s, v19.4s, v25.4s
1229	ushr	v12.4s, v0.4s, #7
1230	shl	v0.4s, v0.4s, #25
1231	add	v7.4s, v7.4s, v4.4s
1232	ushr	v13.4s, v2.4s, #7
1233	shl	v2.4s, v2.4s, #25
1234	eor	v5.16b, v19.16b, v5.16b
1235	add	v10.4s, v10.4s, v3.4s
1236	ldr	q3, [sp, #192]
1237	orr	v0.16b, v0.16b, v12.16b
1238	eor	v1.16b, v7.16b, v1.16b
1239	orr	v2.16b, v2.16b, v13.16b
1240	ushr	v12.4s, v5.4s, #7
1241	shl	v5.4s, v5.4s, #25
1242	add	v22.4s, v22.4s, v3.4s
1243	ushr	v13.4s, v1.4s, #7
1244	shl	v1.4s, v1.4s, #25
1245	add	v10.4s, v10.4s, v0.4s
1246	orr	v5.16b, v5.16b, v12.16b
1247	add	v22.4s, v22.4s, v2.4s
1248	add	v20.4s, v20.4s, v15.4s
1249	ldr	q15, [sp, #128]
1250	orr	v1.16b, v1.16b, v13.16b
1251	add	v17.4s, v17.4s, v24.4s
1252	eor	v4.16b, v4.16b, v10.16b
1253	eor	v25.16b, v25.16b, v22.16b
1254	add	v20.4s, v20.4s, v5.4s
1255	add	v17.4s, v17.4s, v1.4s
1256	tbl	v4.16b, { v4.16b }, v27.16b
1257	tbl	v25.16b, { v25.16b }, v27.16b
1258	eor	v6.16b, v6.16b, v20.16b
1259	eor	v16.16b, v16.16b, v17.16b
1260	add	v26.4s, v26.4s, v4.4s
1261	add	v7.4s, v7.4s, v25.4s
1262	tbl	v6.16b, { v6.16b }, v27.16b
1263	tbl	v16.16b, { v16.16b }, v27.16b
1264	eor	v0.16b, v26.16b, v0.16b
1265	eor	v2.16b, v7.16b, v2.16b
1266	add	v21.4s, v21.4s, v6.4s
1267	add	v19.4s, v19.4s, v16.4s
1268	ushr	v12.4s, v0.4s, #12
1269	shl	v0.4s, v0.4s, #20
1270	ushr	v13.4s, v2.4s, #12
1271	shl	v2.4s, v2.4s, #20
1272	eor	v5.16b, v21.16b, v5.16b
1273	ldp	q23, q11, [sp, #320]
1274	eor	v1.16b, v19.16b, v1.16b
1275	orr	v0.16b, v0.16b, v12.16b
1276	add	v10.4s, v10.4s, v8.4s
1277	orr	v2.16b, v2.16b, v13.16b
1278	ushr	v13.4s, v5.4s, #12
1279	shl	v5.4s, v5.4s, #20
1280	add	v22.4s, v22.4s, v23.4s
1281	ushr	v12.4s, v1.4s, #12
1282	shl	v1.4s, v1.4s, #20
1283	add	v10.4s, v10.4s, v0.4s
1284	mov	v28.16b, v31.16b
1285	mov	v31.16b, v8.16b
1286	ldr	q8, [sp, #208]
1287	orr	v5.16b, v5.16b, v13.16b
1288	add	v22.4s, v22.4s, v2.4s
1289	add	v20.4s, v20.4s, v11.4s
1290	orr	v1.16b, v1.16b, v12.16b
1291	add	v17.4s, v17.4s, v8.4s
1292	eor	v4.16b, v4.16b, v10.16b
1293	eor	v25.16b, v25.16b, v22.16b
1294	add	v20.4s, v20.4s, v5.4s
1295	add	v17.4s, v17.4s, v1.4s
1296	tbl	v4.16b, { v4.16b }, v18.16b
1297	tbl	v25.16b, { v25.16b }, v18.16b
1298	eor	v6.16b, v6.16b, v20.16b
1299	eor	v16.16b, v16.16b, v17.16b
1300	add	v26.4s, v26.4s, v4.4s
1301	add	v7.4s, v7.4s, v25.4s
1302	tbl	v6.16b, { v6.16b }, v18.16b
1303	tbl	v16.16b, { v16.16b }, v18.16b
1304	eor	v0.16b, v26.16b, v0.16b
1305	eor	v2.16b, v7.16b, v2.16b
1306	add	v21.4s, v21.4s, v6.4s
1307	add	v19.4s, v19.4s, v16.4s
1308	ushr	v12.4s, v0.4s, #7
1309	shl	v0.4s, v0.4s, #25
1310	ushr	v13.4s, v2.4s, #7
1311	shl	v2.4s, v2.4s, #25
1312	eor	v5.16b, v21.16b, v5.16b
1313	eor	v1.16b, v19.16b, v1.16b
1314	orr	v0.16b, v0.16b, v12.16b
1315	add	v22.4s, v22.4s, v29.4s
1316	orr	v2.16b, v2.16b, v13.16b
1317	ushr	v13.4s, v5.4s, #7
1318	shl	v5.4s, v5.4s, #25
1319	add	v17.4s, v17.4s, v30.4s
1320	ushr	v12.4s, v1.4s, #7
1321	shl	v1.4s, v1.4s, #25
1322	add	v22.4s, v22.4s, v0.4s
1323	orr	v5.16b, v5.16b, v13.16b
1324	add	v17.4s, v17.4s, v2.4s
1325	add	v10.4s, v10.4s, v9.4s
1326	orr	v1.16b, v1.16b, v12.16b
1327	eor	v16.16b, v16.16b, v22.16b
1328	add	v20.4s, v20.4s, v14.4s
1329	ldr	q14, [sp, #256]
1330	eor	v6.16b, v6.16b, v17.16b
1331	add	v10.4s, v10.4s, v5.4s
1332	tbl	v16.16b, { v16.16b }, v27.16b
1333	add	v20.4s, v20.4s, v1.4s
1334	tbl	v6.16b, { v6.16b }, v27.16b
1335	eor	v25.16b, v25.16b, v10.16b
1336	add	v21.4s, v21.4s, v16.4s
1337	eor	v4.16b, v4.16b, v20.16b
1338	add	v26.4s, v26.4s, v6.4s
1339	tbl	v25.16b, { v25.16b }, v27.16b
1340	eor	v0.16b, v21.16b, v0.16b
1341	tbl	v4.16b, { v4.16b }, v27.16b
1342	eor	v2.16b, v26.16b, v2.16b
1343	add	v19.4s, v19.4s, v25.4s
1344	ushr	v12.4s, v0.4s, #12
1345	shl	v0.4s, v0.4s, #20
1346	add	v7.4s, v7.4s, v4.4s
1347	ushr	v13.4s, v2.4s, #12
1348	shl	v2.4s, v2.4s, #20
1349	eor	v5.16b, v5.16b, v19.16b
1350	orr	v0.16b, v0.16b, v12.16b
1351	eor	v1.16b, v7.16b, v1.16b
1352	add	v22.4s, v22.4s, v3.4s
1353	orr	v2.16b, v2.16b, v13.16b
1354	ushr	v12.4s, v5.4s, #12
1355	shl	v5.4s, v5.4s, #20
1356	add	v17.4s, v17.4s, v15.4s
1357	ushr	v13.4s, v1.4s, #12
1358	shl	v1.4s, v1.4s, #20
1359	add	v22.4s, v22.4s, v0.4s
1360	orr	v5.16b, v5.16b, v12.16b
1361	add	v17.4s, v17.4s, v2.4s
1362	add	v10.4s, v10.4s, v14.4s
1363	orr	v1.16b, v1.16b, v13.16b
1364	eor	v16.16b, v16.16b, v22.16b
1365	add	v20.4s, v20.4s, v8.4s
1366	eor	v6.16b, v6.16b, v17.16b
1367	add	v10.4s, v10.4s, v5.4s
1368	tbl	v16.16b, { v16.16b }, v18.16b
1369	add	v20.4s, v20.4s, v1.4s
1370	tbl	v6.16b, { v6.16b }, v18.16b
1371	eor	v25.16b, v25.16b, v10.16b
1372	add	v21.4s, v21.4s, v16.4s
1373	eor	v4.16b, v4.16b, v20.16b
1374	add	v26.4s, v26.4s, v6.4s
1375	tbl	v25.16b, { v25.16b }, v18.16b
1376	eor	v0.16b, v21.16b, v0.16b
1377	tbl	v4.16b, { v4.16b }, v18.16b
1378	eor	v2.16b, v26.16b, v2.16b
1379	add	v19.4s, v19.4s, v25.4s
1380	ushr	v12.4s, v0.4s, #7
1381	shl	v0.4s, v0.4s, #25
1382	add	v7.4s, v7.4s, v4.4s
1383	ushr	v13.4s, v2.4s, #7
1384	shl	v2.4s, v2.4s, #25
1385	eor	v5.16b, v19.16b, v5.16b
1386	orr	v0.16b, v0.16b, v12.16b
1387	eor	v1.16b, v7.16b, v1.16b
1388	add	v10.4s, v10.4s, v28.4s
1389	orr	v2.16b, v2.16b, v13.16b
1390	ushr	v12.4s, v5.4s, #7
1391	shl	v5.4s, v5.4s, #25
1392	add	v22.4s, v22.4s, v24.4s
1393	ushr	v13.4s, v1.4s, #7
1394	shl	v1.4s, v1.4s, #25
1395	add	v10.4s, v10.4s, v0.4s
1396	orr	v5.16b, v5.16b, v12.16b
1397	add	v22.4s, v22.4s, v2.4s
1398	add	v20.4s, v20.4s, v11.4s
1399	ldr	q11, [sp, #304]
1400	orr	v1.16b, v1.16b, v13.16b
1401	add	v17.4s, v17.4s, v31.4s
1402	ldr	q31, [sp, #224]
1403	eor	v4.16b, v4.16b, v10.16b
1404	eor	v25.16b, v25.16b, v22.16b
1405	add	v20.4s, v20.4s, v5.4s
1406	add	v17.4s, v17.4s, v1.4s
1407	tbl	v4.16b, { v4.16b }, v27.16b
1408	tbl	v25.16b, { v25.16b }, v27.16b
1409	eor	v6.16b, v6.16b, v20.16b
1410	eor	v16.16b, v16.16b, v17.16b
1411	add	v26.4s, v26.4s, v4.4s
1412	add	v7.4s, v7.4s, v25.4s
1413	tbl	v6.16b, { v6.16b }, v27.16b
1414	tbl	v16.16b, { v16.16b }, v27.16b
1415	eor	v0.16b, v26.16b, v0.16b
1416	eor	v2.16b, v7.16b, v2.16b
1417	add	v21.4s, v21.4s, v6.4s
1418	add	v19.4s, v19.4s, v16.4s
1419	ushr	v12.4s, v0.4s, #12
1420	shl	v0.4s, v0.4s, #20
1421	ushr	v13.4s, v2.4s, #12
1422	shl	v2.4s, v2.4s, #20
1423	eor	v5.16b, v21.16b, v5.16b
1424	eor	v1.16b, v19.16b, v1.16b
1425	orr	v0.16b, v0.16b, v12.16b
1426	add	v10.4s, v10.4s, v23.4s
1427	ldr	q23, [sp, #240]
1428	orr	v2.16b, v2.16b, v13.16b
1429	ushr	v13.4s, v5.4s, #12
1430	shl	v5.4s, v5.4s, #20
1431	add	v22.4s, v22.4s, v11.4s
1432	mov	v30.16b, v8.16b
1433	mov	v8.16b, v24.16b
1434	ldr	q24, [sp, #352]
1435	ushr	v12.4s, v1.4s, #12
1436	shl	v1.4s, v1.4s, #20
1437	add	v10.4s, v10.4s, v0.4s
1438	orr	v5.16b, v5.16b, v13.16b
1439	str	q8, [sp, #112]
1440	add	v22.4s, v22.4s, v2.4s
1441	add	v20.4s, v20.4s, v24.4s
1442	orr	v1.16b, v1.16b, v12.16b
1443	add	v17.4s, v17.4s, v31.4s
1444	eor	v4.16b, v4.16b, v10.16b
1445	eor	v25.16b, v25.16b, v22.16b
1446	add	v20.4s, v20.4s, v5.4s
1447	add	v17.4s, v17.4s, v1.4s
1448	tbl	v4.16b, { v4.16b }, v18.16b
1449	tbl	v25.16b, { v25.16b }, v18.16b
1450	eor	v6.16b, v6.16b, v20.16b
1451	eor	v16.16b, v16.16b, v17.16b
1452	add	v26.4s, v26.4s, v4.4s
1453	add	v7.4s, v7.4s, v25.4s
1454	tbl	v6.16b, { v6.16b }, v18.16b
1455	tbl	v16.16b, { v16.16b }, v18.16b
1456	eor	v0.16b, v26.16b, v0.16b
1457	eor	v2.16b, v7.16b, v2.16b
1458	add	v21.4s, v21.4s, v6.4s
1459	mov	v29.16b, v3.16b
1460	add	v19.4s, v19.4s, v16.4s
1461	ushr	v12.4s, v0.4s, #7
1462	shl	v0.4s, v0.4s, #25
1463	ushr	v13.4s, v2.4s, #7
1464	shl	v2.4s, v2.4s, #25
1465	eor	v5.16b, v21.16b, v5.16b
1466	eor	v1.16b, v19.16b, v1.16b
1467	orr	v0.16b, v0.16b, v12.16b
1468	add	v22.4s, v22.4s, v29.4s
1469	orr	v2.16b, v2.16b, v13.16b
1470	ushr	v13.4s, v5.4s, #7
1471	shl	v5.4s, v5.4s, #25
1472	add	v17.4s, v17.4s, v30.4s
1473	ldr	q30, [sp, #272]
1474	ushr	v12.4s, v1.4s, #7
1475	shl	v1.4s, v1.4s, #25
1476	add	v22.4s, v22.4s, v0.4s
1477	mov	v3.16b, v28.16b
1478	ldr	q28, [sp, #176]
1479	orr	v5.16b, v5.16b, v13.16b
1480	add	v17.4s, v17.4s, v2.4s
1481	add	v10.4s, v10.4s, v30.4s
1482	orr	v1.16b, v1.16b, v12.16b
1483	eor	v16.16b, v16.16b, v22.16b
1484	add	v20.4s, v20.4s, v28.4s
1485	eor	v6.16b, v6.16b, v17.16b
1486	add	v10.4s, v10.4s, v5.4s
1487	tbl	v16.16b, { v16.16b }, v27.16b
1488	add	v20.4s, v20.4s, v1.4s
1489	tbl	v6.16b, { v6.16b }, v27.16b
1490	eor	v25.16b, v25.16b, v10.16b
1491	add	v21.4s, v21.4s, v16.4s
1492	eor	v4.16b, v4.16b, v20.16b
1493	add	v26.4s, v26.4s, v6.4s
1494	tbl	v25.16b, { v25.16b }, v27.16b
1495	eor	v0.16b, v21.16b, v0.16b
1496	tbl	v4.16b, { v4.16b }, v27.16b
1497	eor	v2.16b, v26.16b, v2.16b
1498	add	v19.4s, v19.4s, v25.4s
1499	ushr	v12.4s, v0.4s, #12
1500	shl	v0.4s, v0.4s, #20
1501	add	v7.4s, v7.4s, v4.4s
1502	ushr	v13.4s, v2.4s, #12
1503	shl	v2.4s, v2.4s, #20
1504	eor	v5.16b, v5.16b, v19.16b
1505	orr	v0.16b, v0.16b, v12.16b
1506	eor	v1.16b, v7.16b, v1.16b
1507	add	v22.4s, v22.4s, v8.4s
1508	orr	v2.16b, v2.16b, v13.16b
1509	ushr	v12.4s, v5.4s, #12
1510	shl	v5.4s, v5.4s, #20
1511	add	v17.4s, v17.4s, v9.4s
1512	ldr	q9, [sp, #320]
1513	ushr	v13.4s, v1.4s, #12
1514	shl	v1.4s, v1.4s, #20
1515	add	v22.4s, v22.4s, v0.4s
1516	orr	v5.16b, v5.16b, v12.16b
1517	add	v17.4s, v17.4s, v2.4s
1518	add	v10.4s, v10.4s, v23.4s
1519	orr	v1.16b, v1.16b, v13.16b
1520	eor	v16.16b, v16.16b, v22.16b
1521	add	v20.4s, v20.4s, v31.4s
1522	eor	v6.16b, v6.16b, v17.16b
1523	add	v10.4s, v10.4s, v5.4s
1524	tbl	v16.16b, { v16.16b }, v18.16b
1525	add	v20.4s, v20.4s, v1.4s
1526	tbl	v6.16b, { v6.16b }, v18.16b
1527	eor	v25.16b, v25.16b, v10.16b
1528	add	v21.4s, v21.4s, v16.4s
1529	eor	v4.16b, v4.16b, v20.16b
1530	add	v26.4s, v26.4s, v6.4s
1531	tbl	v25.16b, { v25.16b }, v18.16b
1532	eor	v0.16b, v21.16b, v0.16b
1533	tbl	v4.16b, { v4.16b }, v18.16b
1534	eor	v2.16b, v26.16b, v2.16b
1535	add	v19.4s, v19.4s, v25.4s
1536	ushr	v12.4s, v0.4s, #7
1537	shl	v0.4s, v0.4s, #25
1538	add	v7.4s, v7.4s, v4.4s
1539	ushr	v13.4s, v2.4s, #7
1540	shl	v2.4s, v2.4s, #25
1541	eor	v5.16b, v19.16b, v5.16b
1542	add	v10.4s, v10.4s, v14.4s
1543	ldr	q14, [sp, #288]
1544	orr	v0.16b, v0.16b, v12.16b
1545	eor	v1.16b, v7.16b, v1.16b
1546	orr	v2.16b, v2.16b, v13.16b
1547	ushr	v12.4s, v5.4s, #7
1548	shl	v5.4s, v5.4s, #25
1549	add	v22.4s, v22.4s, v14.4s
1550	ushr	v13.4s, v1.4s, #7
1551	shl	v1.4s, v1.4s, #25
1552	add	v10.4s, v10.4s, v0.4s
1553	orr	v5.16b, v5.16b, v12.16b
1554	add	v22.4s, v22.4s, v2.4s
1555	add	v20.4s, v20.4s, v24.4s
1556	orr	v1.16b, v1.16b, v13.16b
1557	eor	v4.16b, v4.16b, v10.16b
1558	add	v17.4s, v17.4s, v9.4s
1559	eor	v25.16b, v25.16b, v22.16b
1560	add	v20.4s, v20.4s, v5.4s
1561	tbl	v4.16b, { v4.16b }, v27.16b
1562	add	v17.4s, v17.4s, v1.4s
1563	tbl	v25.16b, { v25.16b }, v27.16b
1564	eor	v6.16b, v6.16b, v20.16b
1565	add	v26.4s, v26.4s, v4.4s
1566	eor	v16.16b, v16.16b, v17.16b
1567	add	v7.4s, v7.4s, v25.4s
1568	tbl	v6.16b, { v6.16b }, v27.16b
1569	eor	v0.16b, v26.16b, v0.16b
1570	tbl	v16.16b, { v16.16b }, v27.16b
1571	eor	v2.16b, v7.16b, v2.16b
1572	add	v21.4s, v21.4s, v6.4s
1573	ushr	v12.4s, v0.4s, #12
1574	shl	v0.4s, v0.4s, #20
1575	add	v19.4s, v19.4s, v16.4s
1576	ushr	v13.4s, v2.4s, #12
1577	shl	v2.4s, v2.4s, #20
1578	eor	v5.16b, v21.16b, v5.16b
1579	orr	v0.16b, v0.16b, v12.16b
1580	eor	v1.16b, v19.16b, v1.16b
1581	add	v10.4s, v10.4s, v11.4s
1582	orr	v2.16b, v2.16b, v13.16b
1583	ushr	v13.4s, v5.4s, #12
1584	shl	v5.4s, v5.4s, #20
1585	ushr	v12.4s, v1.4s, #12
1586	shl	v1.4s, v1.4s, #20
1587	add	v10.4s, v10.4s, v0.4s
1588	add	v22.4s, v22.4s, v15.4s
1589	orr	v5.16b, v5.16b, v13.16b
1590	add	v20.4s, v20.4s, v3.4s
1591	mov	v24.16b, v3.16b
1592	ldr	q3, [sp, #336]
1593	orr	v1.16b, v1.16b, v12.16b
1594	eor	v4.16b, v4.16b, v10.16b
1595	add	v22.4s, v22.4s, v2.4s
1596	add	v17.4s, v17.4s, v3.4s
1597	add	v20.4s, v20.4s, v5.4s
1598	tbl	v4.16b, { v4.16b }, v18.16b
1599	eor	v25.16b, v25.16b, v22.16b
1600	add	v17.4s, v17.4s, v1.4s
1601	eor	v6.16b, v6.16b, v20.16b
1602	add	v26.4s, v26.4s, v4.4s
1603	tbl	v25.16b, { v25.16b }, v18.16b
1604	eor	v16.16b, v16.16b, v17.16b
1605	tbl	v6.16b, { v6.16b }, v18.16b
1606	eor	v0.16b, v26.16b, v0.16b
1607	add	v7.4s, v7.4s, v25.4s
1608	tbl	v16.16b, { v16.16b }, v18.16b
1609	add	v21.4s, v21.4s, v6.4s
1610	ushr	v12.4s, v0.4s, #7
1611	shl	v0.4s, v0.4s, #25
1612	eor	v2.16b, v7.16b, v2.16b
1613	add	v19.4s, v19.4s, v16.4s
1614	eor	v5.16b, v21.16b, v5.16b
1615	orr	v0.16b, v0.16b, v12.16b
1616	ushr	v12.4s, v2.4s, #7
1617	shl	v2.4s, v2.4s, #25
1618	eor	v1.16b, v19.16b, v1.16b
1619	ushr	v13.4s, v5.4s, #7
1620	shl	v5.4s, v5.4s, #25
1621	add	v22.4s, v22.4s, v8.4s
1622	orr	v2.16b, v2.16b, v12.16b
1623	ushr	v12.4s, v1.4s, #7
1624	shl	v1.4s, v1.4s, #25
1625	orr	v5.16b, v5.16b, v13.16b
1626	add	v22.4s, v22.4s, v0.4s
1627	add	v10.4s, v10.4s, v29.4s
1628	ldr	q29, [sp, #208]
1629	add	v17.4s, v17.4s, v31.4s
1630	orr	v1.16b, v1.16b, v12.16b
1631	add	v20.4s, v20.4s, v29.4s
1632	eor	v16.16b, v16.16b, v22.16b
1633	add	v10.4s, v10.4s, v5.4s
1634	add	v17.4s, v17.4s, v2.4s
1635	add	v20.4s, v20.4s, v1.4s
1636	tbl	v16.16b, { v16.16b }, v27.16b
1637	eor	v25.16b, v25.16b, v10.16b
1638	eor	v6.16b, v6.16b, v17.16b
1639	eor	v4.16b, v4.16b, v20.16b
1640	add	v21.4s, v21.4s, v16.4s
1641	tbl	v25.16b, { v25.16b }, v27.16b
1642	tbl	v6.16b, { v6.16b }, v27.16b
1643	tbl	v4.16b, { v4.16b }, v27.16b
1644	eor	v0.16b, v21.16b, v0.16b
1645	add	v19.4s, v19.4s, v25.4s
1646	add	v26.4s, v26.4s, v6.4s
1647	add	v7.4s, v7.4s, v4.4s
1648	ushr	v12.4s, v0.4s, #12
1649	shl	v0.4s, v0.4s, #20
1650	eor	v5.16b, v5.16b, v19.16b
1651	eor	v2.16b, v26.16b, v2.16b
1652	eor	v1.16b, v7.16b, v1.16b
1653	orr	v0.16b, v0.16b, v12.16b
1654	ushr	v12.4s, v5.4s, #12
1655	shl	v5.4s, v5.4s, #20
1656	add	v22.4s, v22.4s, v14.4s
1657	mov	v8.16b, v31.16b
1658	ushr	v13.4s, v2.4s, #12
1659	shl	v2.4s, v2.4s, #20
1660	mov	v31.16b, v14.16b
1661	ushr	v14.4s, v1.4s, #12
1662	shl	v1.4s, v1.4s, #20
1663	orr	v5.16b, v5.16b, v12.16b
1664	add	v22.4s, v22.4s, v0.4s
1665	add	v10.4s, v10.4s, v28.4s
1666	ldr	q28, [sp, #352]
1667	orr	v2.16b, v2.16b, v13.16b
1668	orr	v1.16b, v1.16b, v14.16b
1669	add	v17.4s, v17.4s, v30.4s
1670	add	v20.4s, v20.4s, v3.4s
1671	eor	v16.16b, v16.16b, v22.16b
1672	add	v10.4s, v10.4s, v5.4s
1673	add	v17.4s, v17.4s, v2.4s
1674	add	v20.4s, v20.4s, v1.4s
1675	tbl	v16.16b, { v16.16b }, v18.16b
1676	eor	v25.16b, v25.16b, v10.16b
1677	eor	v6.16b, v6.16b, v17.16b
1678	eor	v4.16b, v4.16b, v20.16b
1679	add	v21.4s, v21.4s, v16.4s
1680	tbl	v25.16b, { v25.16b }, v18.16b
1681	tbl	v6.16b, { v6.16b }, v18.16b
1682	tbl	v4.16b, { v4.16b }, v18.16b
1683	eor	v0.16b, v21.16b, v0.16b
1684	add	v19.4s, v19.4s, v25.4s
1685	add	v26.4s, v26.4s, v6.4s
1686	add	v7.4s, v7.4s, v4.4s
1687	ushr	v12.4s, v0.4s, #7
1688	shl	v0.4s, v0.4s, #25
1689	eor	v5.16b, v19.16b, v5.16b
1690	eor	v2.16b, v26.16b, v2.16b
1691	eor	v1.16b, v7.16b, v1.16b
1692	orr	v0.16b, v0.16b, v12.16b
1693	ushr	v12.4s, v5.4s, #7
1694	shl	v5.4s, v5.4s, #25
1695	add	v10.4s, v10.4s, v23.4s
1696	ushr	v13.4s, v2.4s, #7
1697	shl	v2.4s, v2.4s, #25
1698	ushr	v14.4s, v1.4s, #7
1699	shl	v1.4s, v1.4s, #25
1700	orr	v5.16b, v5.16b, v12.16b
1701	add	v10.4s, v10.4s, v0.4s
1702	add	v20.4s, v20.4s, v24.4s
1703	ldr	q24, [sp, #144]
1704	orr	v2.16b, v2.16b, v13.16b
1705	orr	v1.16b, v1.16b, v14.16b
1706	add	v22.4s, v22.4s, v9.4s
1707	add	v17.4s, v17.4s, v11.4s
1708	eor	v4.16b, v4.16b, v10.16b
1709	add	v20.4s, v20.4s, v5.4s
1710	add	v22.4s, v22.4s, v2.4s
1711	add	v17.4s, v17.4s, v1.4s
1712	tbl	v4.16b, { v4.16b }, v27.16b
1713	eor	v6.16b, v6.16b, v20.16b
1714	eor	v25.16b, v25.16b, v22.16b
1715	eor	v16.16b, v16.16b, v17.16b
1716	add	v26.4s, v26.4s, v4.4s
1717	tbl	v6.16b, { v6.16b }, v27.16b
1718	tbl	v25.16b, { v25.16b }, v27.16b
1719	tbl	v16.16b, { v16.16b }, v27.16b
1720	eor	v0.16b, v26.16b, v0.16b
1721	add	v21.4s, v21.4s, v6.4s
1722	add	v7.4s, v7.4s, v25.4s
1723	add	v19.4s, v19.4s, v16.4s
1724	ushr	v12.4s, v0.4s, #12
1725	shl	v0.4s, v0.4s, #20
1726	eor	v5.16b, v21.16b, v5.16b
1727	eor	v2.16b, v7.16b, v2.16b
1728	eor	v1.16b, v19.16b, v1.16b
1729	orr	v0.16b, v0.16b, v12.16b
1730	add	v10.4s, v10.4s, v15.4s
1731	ushr	v14.4s, v5.4s, #12
1732	shl	v5.4s, v5.4s, #20
1733	mov	v30.16b, v3.16b
1734	ldr	q3, [sp, #256]
1735	ushr	v12.4s, v2.4s, #12
1736	shl	v2.4s, v2.4s, #20
1737	ushr	v13.4s, v1.4s, #12
1738	shl	v1.4s, v1.4s, #20
1739	add	v10.4s, v10.4s, v0.4s
1740	orr	v5.16b, v5.16b, v14.16b
1741	add	v20.4s, v20.4s, v3.4s
1742	orr	v2.16b, v2.16b, v12.16b
1743	orr	v1.16b, v1.16b, v13.16b
1744	add	v22.4s, v22.4s, v24.4s
1745	add	v17.4s, v17.4s, v28.4s
1746	eor	v4.16b, v4.16b, v10.16b
1747	add	v20.4s, v20.4s, v5.4s
1748	add	v22.4s, v22.4s, v2.4s
1749	add	v17.4s, v17.4s, v1.4s
1750	tbl	v4.16b, { v4.16b }, v18.16b
1751	eor	v6.16b, v6.16b, v20.16b
1752	eor	v25.16b, v25.16b, v22.16b
1753	eor	v16.16b, v16.16b, v17.16b
1754	add	v26.4s, v26.4s, v4.4s
1755	tbl	v6.16b, { v6.16b }, v18.16b
1756	tbl	v25.16b, { v25.16b }, v18.16b
1757	tbl	v16.16b, { v16.16b }, v18.16b
1758	eor	v0.16b, v26.16b, v0.16b
1759	add	v21.4s, v21.4s, v6.4s
1760	add	v7.4s, v7.4s, v25.4s
1761	add	v19.4s, v19.4s, v16.4s
1762	ushr	v12.4s, v0.4s, #7
1763	shl	v0.4s, v0.4s, #25
1764	eor	v5.16b, v21.16b, v5.16b
1765	eor	v2.16b, v7.16b, v2.16b
1766	eor	v1.16b, v19.16b, v1.16b
1767	orr	v0.16b, v0.16b, v12.16b
1768	ushr	v12.4s, v5.4s, #7
1769	shl	v5.4s, v5.4s, #25
1770	mov	v23.16b, v9.16b
1771	ldr	q9, [sp, #112]
1772	ushr	v13.4s, v2.4s, #7
1773	shl	v2.4s, v2.4s, #25
1774	ushr	v14.4s, v1.4s, #7
1775	shl	v1.4s, v1.4s, #25
1776	orr	v5.16b, v5.16b, v12.16b
1777	add	v9.4s, v10.4s, v9.4s
1778	orr	v2.16b, v2.16b, v13.16b
1779	orr	v1.16b, v1.16b, v14.16b
1780	ldr	q14, [sp, #64]
1781	add	v22.4s, v22.4s, v31.4s
1782	add	v17.4s, v17.4s, v30.4s
1783	add	v20.4s, v20.4s, v8.4s
1784	add	v9.4s, v9.4s, v5.4s
1785	add	v22.4s, v22.4s, v0.4s
1786	add	v17.4s, v17.4s, v2.4s
1787	add	v20.4s, v20.4s, v1.4s
1788	eor	v25.16b, v25.16b, v9.16b
1789	eor	v16.16b, v16.16b, v22.16b
1790	eor	v6.16b, v6.16b, v17.16b
1791	eor	v4.16b, v4.16b, v20.16b
1792	tbl	v25.16b, { v25.16b }, v27.16b
1793	tbl	v16.16b, { v16.16b }, v27.16b
1794	tbl	v6.16b, { v6.16b }, v27.16b
1795	tbl	v4.16b, { v4.16b }, v27.16b
1796	add	v19.4s, v19.4s, v25.4s
1797	add	v21.4s, v21.4s, v16.4s
1798	add	v26.4s, v26.4s, v6.4s
1799	add	v7.4s, v7.4s, v4.4s
1800	eor	v5.16b, v5.16b, v19.16b
1801	eor	v0.16b, v21.16b, v0.16b
1802	eor	v2.16b, v26.16b, v2.16b
1803	eor	v1.16b, v7.16b, v1.16b
1804	ushr	v30.4s, v5.4s, #12
1805	shl	v5.4s, v5.4s, #20
1806	ushr	v10.4s, v0.4s, #12
1807	shl	v0.4s, v0.4s, #20
1808	ushr	v12.4s, v2.4s, #12
1809	shl	v2.4s, v2.4s, #20
1810	ushr	v13.4s, v1.4s, #12
1811	shl	v1.4s, v1.4s, #20
1812	orr	v5.16b, v5.16b, v30.16b
1813	add	v30.4s, v9.4s, v29.4s
1814	add	v22.4s, v22.4s, v23.4s
1815	ldr	q23, [sp, #192]
1816	orr	v0.16b, v0.16b, v10.16b
1817	orr	v2.16b, v2.16b, v12.16b
1818	orr	v1.16b, v1.16b, v13.16b
1819	add	v17.4s, v17.4s, v23.4s
1820	add	v20.4s, v20.4s, v28.4s
1821	add	v23.4s, v30.4s, v5.4s
1822	add	v22.4s, v22.4s, v0.4s
1823	add	v17.4s, v17.4s, v2.4s
1824	add	v20.4s, v20.4s, v1.4s
1825	eor	v25.16b, v25.16b, v23.16b
1826	eor	v16.16b, v16.16b, v22.16b
1827	eor	v6.16b, v6.16b, v17.16b
1828	eor	v4.16b, v4.16b, v20.16b
1829	tbl	v25.16b, { v25.16b }, v18.16b
1830	tbl	v16.16b, { v16.16b }, v18.16b
1831	tbl	v6.16b, { v6.16b }, v18.16b
1832	tbl	v4.16b, { v4.16b }, v18.16b
1833	add	v19.4s, v19.4s, v25.4s
1834	add	v21.4s, v21.4s, v16.4s
1835	add	v26.4s, v26.4s, v6.4s
1836	add	v7.4s, v7.4s, v4.4s
1837	eor	v5.16b, v19.16b, v5.16b
1838	eor	v0.16b, v21.16b, v0.16b
1839	eor	v2.16b, v26.16b, v2.16b
1840	eor	v1.16b, v7.16b, v1.16b
1841	ushr	v28.4s, v5.4s, #7
1842	shl	v5.4s, v5.4s, #25
1843	ushr	v30.4s, v0.4s, #7
1844	shl	v0.4s, v0.4s, #25
1845	ushr	v31.4s, v2.4s, #7
1846	shl	v2.4s, v2.4s, #25
1847	ushr	v8.4s, v1.4s, #7
1848	shl	v1.4s, v1.4s, #25
1849	orr	v5.16b, v5.16b, v28.16b
1850	ldr	q28, [sp, #176]
1851	orr	v0.16b, v0.16b, v30.16b
1852	orr	v2.16b, v2.16b, v31.16b
1853	orr	v1.16b, v1.16b, v8.16b
1854	add	v23.4s, v23.4s, v28.4s
1855	add	v22.4s, v22.4s, v11.4s
1856	add	v17.4s, v17.4s, v15.4s
1857	add	v20.4s, v20.4s, v3.4s
1858	ldr	q3, [sp, #272]
1859	add	v23.4s, v23.4s, v0.4s
1860	add	v22.4s, v22.4s, v2.4s
1861	add	v17.4s, v17.4s, v1.4s
1862	add	v20.4s, v20.4s, v5.4s
1863	eor	v4.16b, v4.16b, v23.16b
1864	eor	v25.16b, v25.16b, v22.16b
1865	eor	v16.16b, v16.16b, v17.16b
1866	eor	v6.16b, v6.16b, v20.16b
1867	tbl	v4.16b, { v4.16b }, v27.16b
1868	tbl	v25.16b, { v25.16b }, v27.16b
1869	tbl	v16.16b, { v16.16b }, v27.16b
1870	tbl	v6.16b, { v6.16b }, v27.16b
1871	add	v26.4s, v26.4s, v4.4s
1872	add	v7.4s, v7.4s, v25.4s
1873	add	v19.4s, v19.4s, v16.4s
1874	add	v21.4s, v21.4s, v6.4s
1875	eor	v0.16b, v26.16b, v0.16b
1876	eor	v2.16b, v7.16b, v2.16b
1877	eor	v1.16b, v19.16b, v1.16b
1878	eor	v5.16b, v21.16b, v5.16b
1879	add	v3.4s, v22.4s, v3.4s
1880	ldr	q22, [sp, #160]
1881	ushr	v28.4s, v0.4s, #12
1882	shl	v0.4s, v0.4s, #20
1883	ushr	v29.4s, v2.4s, #12
1884	shl	v2.4s, v2.4s, #20
1885	ushr	v30.4s, v1.4s, #12
1886	shl	v1.4s, v1.4s, #20
1887	ushr	v31.4s, v5.4s, #12
1888	shl	v5.4s, v5.4s, #20
1889	add	v17.4s, v17.4s, v22.4s
1890	ldr	q22, [sp, #240]
1891	orr	v0.16b, v0.16b, v28.16b
1892	prfm	pldl1keep, [x23, #256]
1893	orr	v2.16b, v2.16b, v29.16b
1894	prfm	pldl1keep, [x24, #256]
1895	orr	v1.16b, v1.16b, v30.16b
1896	prfm	pldl1keep, [x22, #256]
1897	orr	v5.16b, v5.16b, v31.16b
1898	prfm	pldl1keep, [x25, #256]
1899	add	v23.4s, v23.4s, v24.4s
1900	add	v20.4s, v20.4s, v22.4s
1901	add	v3.4s, v3.4s, v2.4s
1902	add	v17.4s, v17.4s, v1.4s
1903	add	v22.4s, v23.4s, v0.4s
1904	add	v20.4s, v20.4s, v5.4s
1905	eor	v23.16b, v25.16b, v3.16b
1906	eor	v16.16b, v16.16b, v17.16b
1907	eor	v4.16b, v4.16b, v22.16b
1908	eor	v6.16b, v6.16b, v20.16b
1909	tbl	v23.16b, { v23.16b }, v18.16b
1910	tbl	v16.16b, { v16.16b }, v18.16b
1911	tbl	v4.16b, { v4.16b }, v18.16b
1912	tbl	v6.16b, { v6.16b }, v18.16b
1913	add	v7.4s, v7.4s, v23.4s
1914	add	v19.4s, v19.4s, v16.4s
1915	add	v18.4s, v26.4s, v4.4s
1916	add	v21.4s, v21.4s, v6.4s
1917	eor	v2.16b, v7.16b, v2.16b
1918	eor	v1.16b, v19.16b, v1.16b
1919	eor	v0.16b, v18.16b, v0.16b
1920	eor	v5.16b, v21.16b, v5.16b
1921	ushr	v25.4s, v2.4s, #7
1922	shl	v2.4s, v2.4s, #25
1923	ushr	v24.4s, v0.4s, #7
1924	shl	v0.4s, v0.4s, #25
1925	ushr	v26.4s, v1.4s, #7
1926	shl	v1.4s, v1.4s, #25
1927	ushr	v27.4s, v5.4s, #7
1928	shl	v5.4s, v5.4s, #25
1929	orr	v0.16b, v0.16b, v24.16b
1930	orr	v2.16b, v2.16b, v25.16b
1931	orr	v1.16b, v1.16b, v26.16b
1932	orr	v5.16b, v5.16b, v27.16b
1933	movi	v13.4s, #64
1934	eor	v29.16b, v19.16b, v22.16b
1935	eor	v8.16b, v21.16b, v3.16b
1936	eor	v30.16b, v17.16b, v18.16b
1937	eor	v31.16b, v20.16b, v7.16b
1938	eor	v24.16b, v5.16b, v23.16b
1939	eor	v18.16b, v0.16b, v16.16b
1940	eor	v25.16b, v2.16b, v6.16b
1941	eor	v26.16b, v1.16b, v4.16b
1942	cbnz	x21, .LBB3_5
1943	b	.LBB3_2
1944.LBB3_6:
1945	cbz	x1, .LBB3_14
1946	adrp	x12, .LCPI3_3
1947	ldr	q0, [x11, :lo12:.LCPI3_1]
1948	orr	w11, w7, w6
1949	ldr	q2, [x10, :lo12:.LCPI3_2]
1950	ldr	q1, [x12, :lo12:.LCPI3_3]
1951	and	x12, x5, #0x1
1952.LBB3_8:
1953	movi	v3.4s, #64
1954	lsr	x13, x4, #32
1955	ldp	q5, q4, [x3]
1956	mov	x15, x2
1957	mov	w14, w11
1958	mov	v3.s[0], w4
1959	ldr	x10, [x0]
1960	mov	v3.s[1], w13
1961	b	.LBB3_11
1962.LBB3_9:
1963	orr	w14, w14, w9
1964.LBB3_10:
1965	ldp	q6, q7, [x10]
1966	mov	v16.16b, v3.16b
1967	and	w14, w14, #0xff
1968	add	v5.4s, v5.4s, v4.4s
1969	mov	x15, x13
1970	mov	v16.s[3], w14
1971	add	x14, x10, #32
1972	uzp1	v17.4s, v6.4s, v7.4s
1973	add	x10, x10, #64
1974	add	v5.4s, v5.4s, v17.4s
1975	eor	v16.16b, v5.16b, v16.16b
1976	tbl	v16.16b, { v16.16b }, v0.16b
1977	add	v18.4s, v16.4s, v1.4s
1978	eor	v19.16b, v18.16b, v4.16b
1979	uzp2	v4.4s, v6.4s, v7.4s
1980	ushr	v6.4s, v19.4s, #12
1981	shl	v7.4s, v19.4s, #20
1982	ld2	{ v19.4s, v20.4s }, [x14]
1983	add	v5.4s, v5.4s, v4.4s
1984	mov	w14, w6
1985	orr	v6.16b, v7.16b, v6.16b
1986	add	v5.4s, v5.4s, v6.4s
1987	eor	v7.16b, v16.16b, v5.16b
1988	add	v5.4s, v5.4s, v19.4s
1989	tbl	v7.16b, { v7.16b }, v2.16b
1990	ext	v5.16b, v5.16b, v5.16b, #12
1991	add	v16.4s, v18.4s, v7.4s
1992	ext	v7.16b, v7.16b, v7.16b, #8
1993	eor	v6.16b, v6.16b, v16.16b
1994	ext	v16.16b, v16.16b, v16.16b, #4
1995	ushr	v18.4s, v6.4s, #7
1996	shl	v6.4s, v6.4s, #25
1997	orr	v6.16b, v6.16b, v18.16b
1998	ext	v18.16b, v20.16b, v20.16b, #12
1999	add	v5.4s, v5.4s, v6.4s
2000	eor	v7.16b, v5.16b, v7.16b
2001	add	v5.4s, v5.4s, v18.4s
2002	tbl	v7.16b, { v7.16b }, v0.16b
2003	add	v16.4s, v16.4s, v7.4s
2004	eor	v6.16b, v6.16b, v16.16b
2005	ushr	v21.4s, v6.4s, #12
2006	shl	v6.4s, v6.4s, #20
2007	orr	v6.16b, v6.16b, v21.16b
2008	uzp1	v21.4s, v17.4s, v17.4s
2009	add	v5.4s, v5.4s, v6.4s
2010	ext	v21.16b, v21.16b, v17.16b, #8
2011	eor	v7.16b, v7.16b, v5.16b
2012	uzp2	v21.4s, v21.4s, v4.4s
2013	tbl	v7.16b, { v7.16b }, v2.16b
2014	add	v5.4s, v5.4s, v21.4s
2015	add	v16.4s, v16.4s, v7.4s
2016	ext	v5.16b, v5.16b, v5.16b, #4
2017	ext	v7.16b, v7.16b, v7.16b, #8
2018	eor	v6.16b, v6.16b, v16.16b
2019	ushr	v22.4s, v6.4s, #7
2020	shl	v6.4s, v6.4s, #25
2021	orr	v6.16b, v6.16b, v22.16b
2022	add	v22.4s, v5.4s, v6.4s
2023	eor	v5.16b, v22.16b, v7.16b
2024	ext	v7.16b, v16.16b, v16.16b, #12
2025	tbl	v16.16b, { v5.16b }, v0.16b
2026	ext	v5.16b, v17.16b, v17.16b, #12
2027	add	v7.4s, v7.4s, v16.4s
2028	ext	v5.16b, v17.16b, v5.16b, #12
2029	ext	v17.16b, v19.16b, v19.16b, #12
2030	mov	v19.16b, v18.16b
2031	eor	v6.16b, v6.16b, v7.16b
2032	rev64	v5.4s, v5.4s
2033	mov	v19.s[1], v17.s[2]
2034	ushr	v20.4s, v6.4s, #12
2035	shl	v6.4s, v6.4s, #20
2036	trn2	v5.4s, v5.4s, v19.4s
2037	orr	v6.16b, v6.16b, v20.16b
2038	zip1	v20.2d, v18.2d, v4.2d
2039	zip2	v4.4s, v4.4s, v18.4s
2040	add	v19.4s, v6.4s, v5.4s
2041	mov	v20.s[3], v17.s[3]
2042	add	v19.4s, v19.4s, v22.4s
2043	ext	v22.16b, v20.16b, v20.16b, #12
2044	eor	v16.16b, v16.16b, v19.16b
2045	ext	v19.16b, v19.16b, v19.16b, #12
2046	tbl	v16.16b, { v16.16b }, v2.16b
2047	add	v7.4s, v7.4s, v16.4s
2048	ext	v16.16b, v16.16b, v16.16b, #8
2049	eor	v6.16b, v6.16b, v7.16b
2050	ext	v7.16b, v7.16b, v7.16b, #4
2051	ushr	v23.4s, v6.4s, #7
2052	shl	v24.4s, v6.4s, #25
2053	uzp1	v6.4s, v20.4s, v22.4s
2054	orr	v20.16b, v24.16b, v23.16b
2055	add	v22.4s, v20.4s, v6.4s
2056	add	v19.4s, v22.4s, v19.4s
2057	eor	v16.16b, v19.16b, v16.16b
2058	tbl	v16.16b, { v16.16b }, v0.16b
2059	add	v7.4s, v7.4s, v16.4s
2060	eor	v18.16b, v20.16b, v7.16b
2061	zip1	v20.4s, v4.4s, v17.4s
2062	zip1	v4.4s, v17.4s, v4.4s
2063	ushr	v17.4s, v18.4s, #12
2064	shl	v18.4s, v18.4s, #20
2065	ext	v20.16b, v4.16b, v20.16b, #8
2066	orr	v4.16b, v18.16b, v17.16b
2067	ext	v18.16b, v21.16b, v21.16b, #4
2068	add	v17.4s, v4.4s, v20.4s
2069	add	v17.4s, v17.4s, v19.4s
2070	uzp1	v19.4s, v18.4s, v18.4s
2071	eor	v16.16b, v16.16b, v17.16b
2072	ext	v19.16b, v19.16b, v18.16b, #8
2073	tbl	v16.16b, { v16.16b }, v2.16b
2074	uzp2	v19.4s, v19.4s, v5.4s
2075	add	v7.4s, v7.4s, v16.4s
2076	add	v17.4s, v17.4s, v19.4s
2077	ext	v16.16b, v16.16b, v16.16b, #8
2078	eor	v4.16b, v4.16b, v7.16b
2079	ext	v17.16b, v17.16b, v17.16b, #4
2080	ext	v7.16b, v7.16b, v7.16b, #12
2081	ushr	v21.4s, v4.4s, #7
2082	shl	v4.4s, v4.4s, #25
2083	orr	v4.16b, v4.16b, v21.16b
2084	ext	v21.16b, v18.16b, v18.16b, #12
2085	add	v17.4s, v17.4s, v4.4s
2086	ext	v18.16b, v18.16b, v21.16b, #12
2087	mov	v21.16b, v20.16b
2088	eor	v16.16b, v17.16b, v16.16b
2089	rev64	v18.4s, v18.4s
2090	mov	v21.s[1], v6.s[2]
2091	tbl	v16.16b, { v16.16b }, v0.16b
2092	add	v7.4s, v7.4s, v16.4s
2093	eor	v4.16b, v4.16b, v7.16b
2094	ushr	v22.4s, v4.4s, #12
2095	shl	v23.4s, v4.4s, #20
2096	trn2	v4.4s, v18.4s, v21.4s
2097	orr	v18.16b, v23.16b, v22.16b
2098	add	v21.4s, v18.4s, v4.4s
2099	add	v17.4s, v21.4s, v17.4s
2100	zip1	v21.2d, v20.2d, v5.2d
2101	zip2	v5.4s, v5.4s, v20.4s
2102	eor	v16.16b, v16.16b, v17.16b
2103	mov	v21.s[3], v6.s[3]
2104	ext	v17.16b, v17.16b, v17.16b, #12
2105	zip1	v20.4s, v5.4s, v6.4s
2106	tbl	v16.16b, { v16.16b }, v2.16b
2107	zip1	v5.4s, v6.4s, v5.4s
2108	add	v22.4s, v7.4s, v16.4s
2109	ext	v16.16b, v16.16b, v16.16b, #8
2110	ext	v20.16b, v5.16b, v20.16b, #8
2111	eor	v7.16b, v18.16b, v22.16b
2112	ext	v18.16b, v21.16b, v21.16b, #12
2113	ushr	v23.4s, v7.4s, #7
2114	shl	v24.4s, v7.4s, #25
2115	uzp1	v7.4s, v21.4s, v18.4s
2116	orr	v18.16b, v24.16b, v23.16b
2117	add	v21.4s, v18.4s, v7.4s
2118	add	v17.4s, v21.4s, v17.4s
2119	ext	v21.16b, v22.16b, v22.16b, #4
2120	eor	v16.16b, v17.16b, v16.16b
2121	tbl	v16.16b, { v16.16b }, v0.16b
2122	add	v21.4s, v21.4s, v16.4s
2123	eor	v18.16b, v18.16b, v21.16b
2124	ushr	v6.4s, v18.4s, #12
2125	shl	v18.4s, v18.4s, #20
2126	orr	v5.16b, v18.16b, v6.16b
2127	add	v6.4s, v5.4s, v20.4s
2128	add	v6.4s, v6.4s, v17.4s
2129	ext	v17.16b, v19.16b, v19.16b, #4
2130	eor	v16.16b, v16.16b, v6.16b
2131	uzp1	v18.4s, v17.4s, v17.4s
2132	tbl	v16.16b, { v16.16b }, v2.16b
2133	ext	v18.16b, v18.16b, v17.16b, #8
2134	add	v19.4s, v21.4s, v16.4s
2135	uzp2	v18.4s, v18.4s, v4.4s
2136	ext	v16.16b, v16.16b, v16.16b, #8
2137	eor	v5.16b, v5.16b, v19.16b
2138	add	v6.4s, v6.4s, v18.4s
2139	ext	v19.16b, v19.16b, v19.16b, #12
2140	ushr	v21.4s, v5.4s, #7
2141	shl	v5.4s, v5.4s, #25
2142	ext	v6.16b, v6.16b, v6.16b, #4
2143	orr	v5.16b, v5.16b, v21.16b
2144	ext	v21.16b, v17.16b, v17.16b, #12
2145	add	v6.4s, v6.4s, v5.4s
2146	ext	v17.16b, v17.16b, v21.16b, #12
2147	mov	v21.16b, v20.16b
2148	eor	v16.16b, v6.16b, v16.16b
2149	rev64	v17.4s, v17.4s
2150	mov	v21.s[1], v7.s[2]
2151	tbl	v16.16b, { v16.16b }, v0.16b
2152	add	v19.4s, v19.4s, v16.4s
2153	eor	v5.16b, v5.16b, v19.16b
2154	ushr	v22.4s, v5.4s, #12
2155	shl	v23.4s, v5.4s, #20
2156	trn2	v5.4s, v17.4s, v21.4s
2157	orr	v17.16b, v23.16b, v22.16b
2158	add	v21.4s, v17.4s, v5.4s
2159	add	v6.4s, v21.4s, v6.4s
2160	eor	v16.16b, v16.16b, v6.16b
2161	ext	v6.16b, v6.16b, v6.16b, #12
2162	tbl	v21.16b, { v16.16b }, v2.16b
2163	zip1	v16.2d, v20.2d, v4.2d
2164	zip2	v4.4s, v4.4s, v20.4s
2165	add	v19.4s, v19.4s, v21.4s
2166	mov	v16.s[3], v7.s[3]
2167	ext	v21.16b, v21.16b, v21.16b, #8
2168	zip1	v20.4s, v4.4s, v7.4s
2169	eor	v17.16b, v17.16b, v19.16b
2170	ext	v22.16b, v16.16b, v16.16b, #12
2171	ext	v19.16b, v19.16b, v19.16b, #4
2172	zip1	v4.4s, v7.4s, v4.4s
2173	ushr	v23.4s, v17.4s, #7
2174	shl	v17.4s, v17.4s, #25
2175	uzp1	v16.4s, v16.4s, v22.4s
2176	ext	v4.16b, v4.16b, v20.16b, #8
2177	orr	v17.16b, v17.16b, v23.16b
2178	add	v22.4s, v17.4s, v16.4s
2179	add	v6.4s, v22.4s, v6.4s
2180	eor	v21.16b, v6.16b, v21.16b
2181	tbl	v21.16b, { v21.16b }, v0.16b
2182	add	v19.4s, v19.4s, v21.4s
2183	eor	v17.16b, v17.16b, v19.16b
2184	ushr	v7.4s, v17.4s, #12
2185	shl	v17.4s, v17.4s, #20
2186	orr	v7.16b, v17.16b, v7.16b
2187	add	v17.4s, v7.4s, v4.4s
2188	add	v6.4s, v17.4s, v6.4s
2189	ext	v17.16b, v18.16b, v18.16b, #4
2190	eor	v18.16b, v21.16b, v6.16b
2191	uzp1	v20.4s, v17.4s, v17.4s
2192	tbl	v18.16b, { v18.16b }, v2.16b
2193	ext	v20.16b, v20.16b, v17.16b, #8
2194	add	v19.4s, v19.4s, v18.4s
2195	uzp2	v20.4s, v20.4s, v5.4s
2196	ext	v18.16b, v18.16b, v18.16b, #8
2197	eor	v7.16b, v7.16b, v19.16b
2198	add	v6.4s, v6.4s, v20.4s
2199	ushr	v21.4s, v7.4s, #7
2200	shl	v7.4s, v7.4s, #25
2201	ext	v6.16b, v6.16b, v6.16b, #4
2202	orr	v7.16b, v7.16b, v21.16b
2203	add	v21.4s, v6.4s, v7.4s
2204	eor	v6.16b, v21.16b, v18.16b
2205	ext	v18.16b, v19.16b, v19.16b, #12
2206	tbl	v19.16b, { v6.16b }, v0.16b
2207	ext	v6.16b, v17.16b, v17.16b, #12
2208	add	v18.4s, v18.4s, v19.4s
2209	ext	v6.16b, v17.16b, v6.16b, #12
2210	mov	v17.16b, v4.16b
2211	eor	v7.16b, v7.16b, v18.16b
2212	rev64	v6.4s, v6.4s
2213	mov	v17.s[1], v16.s[2]
2214	ushr	v22.4s, v7.4s, #12
2215	shl	v7.4s, v7.4s, #20
2216	trn2	v6.4s, v6.4s, v17.4s
2217	orr	v7.16b, v7.16b, v22.16b
2218	add	v17.4s, v7.4s, v6.4s
2219	add	v17.4s, v17.4s, v21.4s
2220	zip1	v21.2d, v4.2d, v5.2d
2221	zip2	v4.4s, v5.4s, v4.4s
2222	eor	v19.16b, v19.16b, v17.16b
2223	mov	v21.s[3], v16.s[3]
2224	ext	v17.16b, v17.16b, v17.16b, #12
2225	tbl	v19.16b, { v19.16b }, v2.16b
2226	ext	v22.16b, v21.16b, v21.16b, #12
2227	add	v18.4s, v18.4s, v19.4s
2228	ext	v19.16b, v19.16b, v19.16b, #8
2229	eor	v7.16b, v7.16b, v18.16b
2230	ext	v18.16b, v18.16b, v18.16b, #4
2231	ushr	v23.4s, v7.4s, #7
2232	shl	v24.4s, v7.4s, #25
2233	uzp1	v7.4s, v21.4s, v22.4s
2234	orr	v21.16b, v24.16b, v23.16b
2235	add	v22.4s, v21.4s, v7.4s
2236	add	v17.4s, v22.4s, v17.4s
2237	eor	v19.16b, v17.16b, v19.16b
2238	tbl	v19.16b, { v19.16b }, v0.16b
2239	add	v18.4s, v18.4s, v19.4s
2240	eor	v5.16b, v21.16b, v18.16b
2241	zip1	v21.4s, v4.4s, v16.4s
2242	zip1	v4.4s, v16.4s, v4.4s
2243	ushr	v16.4s, v5.4s, #12
2244	shl	v5.4s, v5.4s, #20
2245	ext	v21.16b, v4.16b, v21.16b, #8
2246	orr	v4.16b, v5.16b, v16.16b
2247	ext	v16.16b, v20.16b, v20.16b, #4
2248	mov	v23.16b, v21.16b
2249	add	v5.4s, v4.4s, v21.4s
2250	mov	v23.s[1], v7.s[2]
2251	add	v5.4s, v5.4s, v17.4s
2252	eor	v17.16b, v19.16b, v5.16b
2253	uzp1	v19.4s, v16.4s, v16.4s
2254	tbl	v17.16b, { v17.16b }, v2.16b
2255	ext	v19.16b, v19.16b, v16.16b, #8
2256	add	v18.4s, v18.4s, v17.4s
2257	uzp2	v19.4s, v19.4s, v6.4s
2258	eor	v4.16b, v4.16b, v18.16b
2259	add	v5.4s, v5.4s, v19.4s
2260	ext	v19.16b, v19.16b, v19.16b, #4
2261	ushr	v20.4s, v4.4s, #7
2262	shl	v4.4s, v4.4s, #25
2263	ext	v5.16b, v5.16b, v5.16b, #4
2264	orr	v20.16b, v4.16b, v20.16b
2265	ext	v4.16b, v17.16b, v17.16b, #8
2266	add	v17.4s, v5.4s, v20.4s
2267	ext	v5.16b, v18.16b, v18.16b, #12
2268	eor	v4.16b, v17.16b, v4.16b
2269	tbl	v18.16b, { v4.16b }, v0.16b
2270	ext	v4.16b, v16.16b, v16.16b, #12
2271	add	v22.4s, v5.4s, v18.4s
2272	ext	v4.16b, v16.16b, v4.16b, #12
2273	eor	v5.16b, v20.16b, v22.16b
2274	rev64	v16.4s, v4.4s
2275	ushr	v20.4s, v5.4s, #12
2276	shl	v24.4s, v5.4s, #20
2277	trn2	v5.4s, v16.4s, v23.4s
2278	orr	v16.16b, v24.16b, v20.16b
2279	add	v20.4s, v16.4s, v5.4s
2280	add	v17.4s, v20.4s, v17.4s
2281	zip1	v20.2d, v21.2d, v6.2d
2282	zip2	v6.4s, v6.4s, v21.4s
2283	eor	v18.16b, v18.16b, v17.16b
2284	mov	v20.s[3], v7.s[3]
2285	ext	v17.16b, v17.16b, v17.16b, #12
2286	zip1	v21.4s, v6.4s, v7.4s
2287	tbl	v18.16b, { v18.16b }, v2.16b
2288	ext	v24.16b, v20.16b, v20.16b, #12
2289	zip1	v6.4s, v7.4s, v6.4s
2290	add	v22.4s, v22.4s, v18.4s
2291	ext	v18.16b, v18.16b, v18.16b, #8
2292	ext	v6.16b, v6.16b, v21.16b, #8
2293	eor	v16.16b, v16.16b, v22.16b
2294	ext	v22.16b, v22.16b, v22.16b, #4
2295	zip1	v5.2d, v6.2d, v5.2d
2296	zip2	v4.4s, v4.4s, v6.4s
2297	ushr	v25.4s, v16.4s, #7
2298	shl	v26.4s, v16.4s, #25
2299	uzp1	v16.4s, v20.4s, v24.4s
2300	orr	v20.16b, v26.16b, v25.16b
2301	mov	v5.s[3], v16.s[3]
2302	add	v24.4s, v20.4s, v16.4s
2303	add	v17.4s, v24.4s, v17.4s
2304	eor	v18.16b, v17.16b, v18.16b
2305	tbl	v18.16b, { v18.16b }, v0.16b
2306	add	v22.4s, v22.4s, v18.4s
2307	eor	v20.16b, v20.16b, v22.16b
2308	ushr	v7.4s, v20.4s, #12
2309	shl	v20.4s, v20.4s, #20
2310	orr	v7.16b, v20.16b, v7.16b
2311	add	v20.4s, v7.4s, v6.4s
2312	add	v17.4s, v20.4s, v17.4s
2313	ext	v20.16b, v19.16b, v19.16b, #8
2314	eor	v18.16b, v18.16b, v17.16b
2315	ext	v17.16b, v17.16b, v17.16b, #4
2316	tbl	v18.16b, { v18.16b }, v2.16b
2317	add	v21.4s, v22.4s, v18.4s
2318	uzp2	v22.4s, v20.4s, v23.4s
2319	ext	v18.16b, v18.16b, v18.16b, #8
2320	eor	v7.16b, v7.16b, v21.16b
2321	ext	v20.16b, v22.16b, v20.16b, #4
2322	ushr	v22.4s, v7.4s, #7
2323	shl	v7.4s, v7.4s, #25
2324	add	v17.4s, v17.4s, v20.4s
2325	ext	v20.16b, v21.16b, v21.16b, #12
2326	ext	v21.16b, v19.16b, v19.16b, #12
2327	orr	v7.16b, v7.16b, v22.16b
2328	ext	v19.16b, v19.16b, v21.16b, #12
2329	add	v17.4s, v17.4s, v7.4s
2330	mov	v21.16b, v6.16b
2331	rev64	v19.4s, v19.4s
2332	eor	v18.16b, v17.16b, v18.16b
2333	mov	v21.s[1], v16.s[2]
2334	tbl	v18.16b, { v18.16b }, v0.16b
2335	trn2	v19.4s, v19.4s, v21.4s
2336	add	v20.4s, v20.4s, v18.4s
2337	eor	v7.16b, v7.16b, v20.16b
2338	ushr	v22.4s, v7.4s, #12
2339	shl	v7.4s, v7.4s, #20
2340	orr	v7.16b, v7.16b, v22.16b
2341	add	v19.4s, v7.4s, v19.4s
2342	add	v17.4s, v19.4s, v17.4s
2343	eor	v18.16b, v18.16b, v17.16b
2344	ext	v17.16b, v17.16b, v17.16b, #12
2345	tbl	v18.16b, { v18.16b }, v2.16b
2346	add	v19.4s, v20.4s, v18.4s
2347	ext	v20.16b, v5.16b, v5.16b, #12
2348	ext	v18.16b, v18.16b, v18.16b, #8
2349	eor	v7.16b, v7.16b, v19.16b
2350	uzp1	v5.4s, v5.4s, v20.4s
2351	ushr	v21.4s, v7.4s, #7
2352	shl	v7.4s, v7.4s, #25
2353	orr	v7.16b, v7.16b, v21.16b
2354	add	v5.4s, v7.4s, v5.4s
2355	add	v5.4s, v5.4s, v17.4s
2356	eor	v17.16b, v5.16b, v18.16b
2357	ext	v18.16b, v19.16b, v19.16b, #4
2358	tbl	v17.16b, { v17.16b }, v0.16b
2359	add	v18.4s, v18.4s, v17.4s
2360	eor	v6.16b, v7.16b, v18.16b
2361	zip1	v7.4s, v4.4s, v16.4s
2362	zip1	v4.4s, v16.4s, v4.4s
2363	ushr	v16.4s, v6.4s, #12
2364	shl	v6.4s, v6.4s, #20
2365	ext	v4.16b, v4.16b, v7.16b, #8
2366	orr	v6.16b, v6.16b, v16.16b
2367	add	v4.4s, v6.4s, v4.4s
2368	add	v4.4s, v4.4s, v5.4s
2369	eor	v5.16b, v17.16b, v4.16b
2370	ext	v4.16b, v4.16b, v4.16b, #4
2371	tbl	v5.16b, { v5.16b }, v2.16b
2372	add	v7.4s, v18.4s, v5.4s
2373	eor	v6.16b, v6.16b, v7.16b
2374	ext	v7.16b, v7.16b, v7.16b, #12
2375	ushr	v16.4s, v6.4s, #7
2376	shl	v6.4s, v6.4s, #25
2377	orr	v6.16b, v6.16b, v16.16b
2378	ext	v16.16b, v5.16b, v5.16b, #8
2379	eor	v5.16b, v4.16b, v7.16b
2380	eor	v4.16b, v6.16b, v16.16b
2381.LBB3_11:
2382	subs	x13, x15, #1
2383	b.eq	.LBB3_9
2384	cbnz	x15, .LBB3_10
2385	add	x4, x4, x12
2386	add	x0, x0, #8
2387	subs	x1, x1, #1
2388	stp	q5, q4, [x8], #32
2389	b.ne	.LBB3_8
2390.LBB3_14:
2391	add	sp, sp, #368
2392	ldp	x20, x19, [sp, #128]
2393	ldp	x22, x21, [sp, #112]
2394	ldp	x24, x23, [sp, #96]
2395	ldp	x26, x25, [sp, #80]
2396	ldp	x29, x27, [sp, #64]
2397	ldp	d9, d8, [sp, #48]
2398	ldp	d11, d10, [sp, #32]
2399	ldp	d13, d12, [sp, #16]
2400	ldp	d15, d14, [sp], #144
2401	ret
2402.Lfunc_end3:
2403	.size	zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41
2404	.cfi_endproc
2405	.section	".note.GNU-stack","",@progbits
2406#endif
2407