sha256-armv4.S revision 326663
1/* $FreeBSD: stable/11/secure/lib/libcrypto/arm/sha256-armv4.S 326663 2017-12-07 18:04:48Z jkim $ */
2/* Do not modify. This file is auto-generated from sha256-armv4.pl. */
3
4@ ====================================================================
5@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
6@ project. The module is, however, dual licensed under OpenSSL and
7@ CRYPTOGAMS licenses depending on where you obtain it. For further
8@ details see http://www.openssl.org/~appro/cryptogams/.
9@
10@ Permission to use under GPL terms is granted.
11@ ====================================================================
12
13@ SHA256 block procedure for ARMv4. May 2007.
14
15@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
16@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
17@ byte [on single-issue Xscale PXA250 core].
18
19@ July 2010.
20@
21@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
22@ Cortex A8 core and ~20 cycles per processed byte.
23
24@ February 2011.
25@
26@ Profiler-assisted and platform-specific optimization resulted in 16%
27@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
28
29@ September 2013.
30@
31@ Add NEON implementation. On Cortex A8 it was measured to process one
32@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
33@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
34@ code (meaning that latter performs sub-optimally, nothing was done
35@ about it).
36
37@ May 2014.
38@
39@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
40
41#ifndef __KERNEL__
42# include "arm_arch.h"
43#else
44# define __ARM_ARCH__ __LINUX_ARM_ARCH__
45# define __ARM_MAX_ARCH__ 7
46#endif
47
48.text
49#if __ARM_ARCH__<7
50.code	32
51#else
52.syntax unified
53# ifdef __thumb2__
54.thumb
55# else
56.code   32
57# endif
58#endif
59
60.type	K256,%object
61.align	5
62K256:
63.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
64.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
65.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
66.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
67.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
68.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
69.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
70.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
71.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
72.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
73.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
74.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
75.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
76.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
77.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
78.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
79.size	K256,.-K256
80.word	0				@ terminator
81#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
82.LOPENSSL_armcap:
83.word	OPENSSL_armcap_P-sha256_block_data_order
84#endif
85.align	5
86
87.global	sha256_block_data_order
88.type	sha256_block_data_order,%function
89sha256_block_data_order:
90#if __ARM_ARCH__<7
91	sub	r3,pc,#8		@ sha256_block_data_order
92#else
93	adr	r3,.
94#endif
95#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
96	ldr	r12,.LOPENSSL_armcap
97	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
98	tst	r12,#ARMV8_SHA256
99	bne	.LARMv8
100	tst	r12,#ARMV7_NEON
101	bne	.LNEON
102#endif
103	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
104	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
105	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
106	sub	r14,r3,#256+32	@ K256
107	sub	sp,sp,#16*4		@ alloca(X[16])
108.Loop:
109# if __ARM_ARCH__>=7
110	ldr	r2,[r1],#4
111# else
112	ldrb	r2,[r1,#3]
113# endif
114	eor	r3,r5,r6		@ magic
115	eor	r12,r12,r12
116#if __ARM_ARCH__>=7
117	@ ldr	r2,[r1],#4			@ 0
118# if 0==15
119	str	r1,[sp,#17*4]			@ make room for r1
120# endif
121	eor	r0,r8,r8,ror#5
122	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
123	eor	r0,r0,r8,ror#19	@ Sigma1(e)
124	rev	r2,r2
125#else
126	@ ldrb	r2,[r1,#3]			@ 0
127	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
128	ldrb	r12,[r1,#2]
129	ldrb	r0,[r1,#1]
130	orr	r2,r2,r12,lsl#8
131	ldrb	r12,[r1],#4
132	orr	r2,r2,r0,lsl#16
133# if 0==15
134	str	r1,[sp,#17*4]			@ make room for r1
135# endif
136	eor	r0,r8,r8,ror#5
137	orr	r2,r2,r12,lsl#24
138	eor	r0,r0,r8,ror#19	@ Sigma1(e)
139#endif
140	ldr	r12,[r14],#4			@ *K256++
141	add	r11,r11,r2			@ h+=X[i]
142	str	r2,[sp,#0*4]
143	eor	r2,r9,r10
144	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
145	and	r2,r2,r8
146	add	r11,r11,r12			@ h+=K256[i]
147	eor	r2,r2,r10			@ Ch(e,f,g)
148	eor	r0,r4,r4,ror#11
149	add	r11,r11,r2			@ h+=Ch(e,f,g)
150#if 0==31
151	and	r12,r12,#0xff
152	cmp	r12,#0xf2			@ done?
153#endif
154#if 0<15
155# if __ARM_ARCH__>=7
156	ldr	r2,[r1],#4			@ prefetch
157# else
158	ldrb	r2,[r1,#3]
159# endif
160	eor	r12,r4,r5			@ a^b, b^c in next round
161#else
162	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
163	eor	r12,r4,r5			@ a^b, b^c in next round
164	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
165#endif
166	eor	r0,r0,r4,ror#20	@ Sigma0(a)
167	and	r3,r3,r12			@ (b^c)&=(a^b)
168	add	r7,r7,r11			@ d+=h
169	eor	r3,r3,r5			@ Maj(a,b,c)
170	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
171	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
172#if __ARM_ARCH__>=7
173	@ ldr	r2,[r1],#4			@ 1
174# if 1==15
175	str	r1,[sp,#17*4]			@ make room for r1
176# endif
177	eor	r0,r7,r7,ror#5
178	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
179	eor	r0,r0,r7,ror#19	@ Sigma1(e)
180	rev	r2,r2
181#else
182	@ ldrb	r2,[r1,#3]			@ 1
183	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
184	ldrb	r3,[r1,#2]
185	ldrb	r0,[r1,#1]
186	orr	r2,r2,r3,lsl#8
187	ldrb	r3,[r1],#4
188	orr	r2,r2,r0,lsl#16
189# if 1==15
190	str	r1,[sp,#17*4]			@ make room for r1
191# endif
192	eor	r0,r7,r7,ror#5
193	orr	r2,r2,r3,lsl#24
194	eor	r0,r0,r7,ror#19	@ Sigma1(e)
195#endif
196	ldr	r3,[r14],#4			@ *K256++
197	add	r10,r10,r2			@ h+=X[i]
198	str	r2,[sp,#1*4]
199	eor	r2,r8,r9
200	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
201	and	r2,r2,r7
202	add	r10,r10,r3			@ h+=K256[i]
203	eor	r2,r2,r9			@ Ch(e,f,g)
204	eor	r0,r11,r11,ror#11
205	add	r10,r10,r2			@ h+=Ch(e,f,g)
206#if 1==31
207	and	r3,r3,#0xff
208	cmp	r3,#0xf2			@ done?
209#endif
210#if 1<15
211# if __ARM_ARCH__>=7
212	ldr	r2,[r1],#4			@ prefetch
213# else
214	ldrb	r2,[r1,#3]
215# endif
216	eor	r3,r11,r4			@ a^b, b^c in next round
217#else
218	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
219	eor	r3,r11,r4			@ a^b, b^c in next round
220	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
221#endif
222	eor	r0,r0,r11,ror#20	@ Sigma0(a)
223	and	r12,r12,r3			@ (b^c)&=(a^b)
224	add	r6,r6,r10			@ d+=h
225	eor	r12,r12,r4			@ Maj(a,b,c)
226	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
227	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
228#if __ARM_ARCH__>=7
229	@ ldr	r2,[r1],#4			@ 2
230# if 2==15
231	str	r1,[sp,#17*4]			@ make room for r1
232# endif
233	eor	r0,r6,r6,ror#5
234	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
235	eor	r0,r0,r6,ror#19	@ Sigma1(e)
236	rev	r2,r2
237#else
238	@ ldrb	r2,[r1,#3]			@ 2
239	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
240	ldrb	r12,[r1,#2]
241	ldrb	r0,[r1,#1]
242	orr	r2,r2,r12,lsl#8
243	ldrb	r12,[r1],#4
244	orr	r2,r2,r0,lsl#16
245# if 2==15
246	str	r1,[sp,#17*4]			@ make room for r1
247# endif
248	eor	r0,r6,r6,ror#5
249	orr	r2,r2,r12,lsl#24
250	eor	r0,r0,r6,ror#19	@ Sigma1(e)
251#endif
252	ldr	r12,[r14],#4			@ *K256++
253	add	r9,r9,r2			@ h+=X[i]
254	str	r2,[sp,#2*4]
255	eor	r2,r7,r8
256	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
257	and	r2,r2,r6
258	add	r9,r9,r12			@ h+=K256[i]
259	eor	r2,r2,r8			@ Ch(e,f,g)
260	eor	r0,r10,r10,ror#11
261	add	r9,r9,r2			@ h+=Ch(e,f,g)
262#if 2==31
263	and	r12,r12,#0xff
264	cmp	r12,#0xf2			@ done?
265#endif
266#if 2<15
267# if __ARM_ARCH__>=7
268	ldr	r2,[r1],#4			@ prefetch
269# else
270	ldrb	r2,[r1,#3]
271# endif
272	eor	r12,r10,r11			@ a^b, b^c in next round
273#else
274	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
275	eor	r12,r10,r11			@ a^b, b^c in next round
276	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
277#endif
278	eor	r0,r0,r10,ror#20	@ Sigma0(a)
279	and	r3,r3,r12			@ (b^c)&=(a^b)
280	add	r5,r5,r9			@ d+=h
281	eor	r3,r3,r11			@ Maj(a,b,c)
282	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
283	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
284#if __ARM_ARCH__>=7
285	@ ldr	r2,[r1],#4			@ 3
286# if 3==15
287	str	r1,[sp,#17*4]			@ make room for r1
288# endif
289	eor	r0,r5,r5,ror#5
290	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
291	eor	r0,r0,r5,ror#19	@ Sigma1(e)
292	rev	r2,r2
293#else
294	@ ldrb	r2,[r1,#3]			@ 3
295	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
296	ldrb	r3,[r1,#2]
297	ldrb	r0,[r1,#1]
298	orr	r2,r2,r3,lsl#8
299	ldrb	r3,[r1],#4
300	orr	r2,r2,r0,lsl#16
301# if 3==15
302	str	r1,[sp,#17*4]			@ make room for r1
303# endif
304	eor	r0,r5,r5,ror#5
305	orr	r2,r2,r3,lsl#24
306	eor	r0,r0,r5,ror#19	@ Sigma1(e)
307#endif
308	ldr	r3,[r14],#4			@ *K256++
309	add	r8,r8,r2			@ h+=X[i]
310	str	r2,[sp,#3*4]
311	eor	r2,r6,r7
312	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
313	and	r2,r2,r5
314	add	r8,r8,r3			@ h+=K256[i]
315	eor	r2,r2,r7			@ Ch(e,f,g)
316	eor	r0,r9,r9,ror#11
317	add	r8,r8,r2			@ h+=Ch(e,f,g)
318#if 3==31
319	and	r3,r3,#0xff
320	cmp	r3,#0xf2			@ done?
321#endif
322#if 3<15
323# if __ARM_ARCH__>=7
324	ldr	r2,[r1],#4			@ prefetch
325# else
326	ldrb	r2,[r1,#3]
327# endif
328	eor	r3,r9,r10			@ a^b, b^c in next round
329#else
330	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
331	eor	r3,r9,r10			@ a^b, b^c in next round
332	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
333#endif
334	eor	r0,r0,r9,ror#20	@ Sigma0(a)
335	and	r12,r12,r3			@ (b^c)&=(a^b)
336	add	r4,r4,r8			@ d+=h
337	eor	r12,r12,r10			@ Maj(a,b,c)
338	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
339	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
340#if __ARM_ARCH__>=7
341	@ ldr	r2,[r1],#4			@ 4
342# if 4==15
343	str	r1,[sp,#17*4]			@ make room for r1
344# endif
345	eor	r0,r4,r4,ror#5
346	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
347	eor	r0,r0,r4,ror#19	@ Sigma1(e)
348	rev	r2,r2
349#else
350	@ ldrb	r2,[r1,#3]			@ 4
351	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
352	ldrb	r12,[r1,#2]
353	ldrb	r0,[r1,#1]
354	orr	r2,r2,r12,lsl#8
355	ldrb	r12,[r1],#4
356	orr	r2,r2,r0,lsl#16
357# if 4==15
358	str	r1,[sp,#17*4]			@ make room for r1
359# endif
360	eor	r0,r4,r4,ror#5
361	orr	r2,r2,r12,lsl#24
362	eor	r0,r0,r4,ror#19	@ Sigma1(e)
363#endif
364	ldr	r12,[r14],#4			@ *K256++
365	add	r7,r7,r2			@ h+=X[i]
366	str	r2,[sp,#4*4]
367	eor	r2,r5,r6
368	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
369	and	r2,r2,r4
370	add	r7,r7,r12			@ h+=K256[i]
371	eor	r2,r2,r6			@ Ch(e,f,g)
372	eor	r0,r8,r8,ror#11
373	add	r7,r7,r2			@ h+=Ch(e,f,g)
374#if 4==31
375	and	r12,r12,#0xff
376	cmp	r12,#0xf2			@ done?
377#endif
378#if 4<15
379# if __ARM_ARCH__>=7
380	ldr	r2,[r1],#4			@ prefetch
381# else
382	ldrb	r2,[r1,#3]
383# endif
384	eor	r12,r8,r9			@ a^b, b^c in next round
385#else
386	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
387	eor	r12,r8,r9			@ a^b, b^c in next round
388	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
389#endif
390	eor	r0,r0,r8,ror#20	@ Sigma0(a)
391	and	r3,r3,r12			@ (b^c)&=(a^b)
392	add	r11,r11,r7			@ d+=h
393	eor	r3,r3,r9			@ Maj(a,b,c)
394	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
395	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
396#if __ARM_ARCH__>=7
397	@ ldr	r2,[r1],#4			@ 5
398# if 5==15
399	str	r1,[sp,#17*4]			@ make room for r1
400# endif
401	eor	r0,r11,r11,ror#5
402	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
403	eor	r0,r0,r11,ror#19	@ Sigma1(e)
404	rev	r2,r2
405#else
406	@ ldrb	r2,[r1,#3]			@ 5
407	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
408	ldrb	r3,[r1,#2]
409	ldrb	r0,[r1,#1]
410	orr	r2,r2,r3,lsl#8
411	ldrb	r3,[r1],#4
412	orr	r2,r2,r0,lsl#16
413# if 5==15
414	str	r1,[sp,#17*4]			@ make room for r1
415# endif
416	eor	r0,r11,r11,ror#5
417	orr	r2,r2,r3,lsl#24
418	eor	r0,r0,r11,ror#19	@ Sigma1(e)
419#endif
420	ldr	r3,[r14],#4			@ *K256++
421	add	r6,r6,r2			@ h+=X[i]
422	str	r2,[sp,#5*4]
423	eor	r2,r4,r5
424	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
425	and	r2,r2,r11
426	add	r6,r6,r3			@ h+=K256[i]
427	eor	r2,r2,r5			@ Ch(e,f,g)
428	eor	r0,r7,r7,ror#11
429	add	r6,r6,r2			@ h+=Ch(e,f,g)
430#if 5==31
431	and	r3,r3,#0xff
432	cmp	r3,#0xf2			@ done?
433#endif
434#if 5<15
435# if __ARM_ARCH__>=7
436	ldr	r2,[r1],#4			@ prefetch
437# else
438	ldrb	r2,[r1,#3]
439# endif
440	eor	r3,r7,r8			@ a^b, b^c in next round
441#else
442	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
443	eor	r3,r7,r8			@ a^b, b^c in next round
444	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
445#endif
446	eor	r0,r0,r7,ror#20	@ Sigma0(a)
447	and	r12,r12,r3			@ (b^c)&=(a^b)
448	add	r10,r10,r6			@ d+=h
449	eor	r12,r12,r8			@ Maj(a,b,c)
450	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
451	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
452#if __ARM_ARCH__>=7
453	@ ldr	r2,[r1],#4			@ 6
454# if 6==15
455	str	r1,[sp,#17*4]			@ make room for r1
456# endif
457	eor	r0,r10,r10,ror#5
458	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
459	eor	r0,r0,r10,ror#19	@ Sigma1(e)
460	rev	r2,r2
461#else
462	@ ldrb	r2,[r1,#3]			@ 6
463	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
464	ldrb	r12,[r1,#2]
465	ldrb	r0,[r1,#1]
466	orr	r2,r2,r12,lsl#8
467	ldrb	r12,[r1],#4
468	orr	r2,r2,r0,lsl#16
469# if 6==15
470	str	r1,[sp,#17*4]			@ make room for r1
471# endif
472	eor	r0,r10,r10,ror#5
473	orr	r2,r2,r12,lsl#24
474	eor	r0,r0,r10,ror#19	@ Sigma1(e)
475#endif
476	ldr	r12,[r14],#4			@ *K256++
477	add	r5,r5,r2			@ h+=X[i]
478	str	r2,[sp,#6*4]
479	eor	r2,r11,r4
480	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
481	and	r2,r2,r10
482	add	r5,r5,r12			@ h+=K256[i]
483	eor	r2,r2,r4			@ Ch(e,f,g)
484	eor	r0,r6,r6,ror#11
485	add	r5,r5,r2			@ h+=Ch(e,f,g)
486#if 6==31
487	and	r12,r12,#0xff
488	cmp	r12,#0xf2			@ done?
489#endif
490#if 6<15
491# if __ARM_ARCH__>=7
492	ldr	r2,[r1],#4			@ prefetch
493# else
494	ldrb	r2,[r1,#3]
495# endif
496	eor	r12,r6,r7			@ a^b, b^c in next round
497#else
498	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
499	eor	r12,r6,r7			@ a^b, b^c in next round
500	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
501#endif
502	eor	r0,r0,r6,ror#20	@ Sigma0(a)
503	and	r3,r3,r12			@ (b^c)&=(a^b)
504	add	r9,r9,r5			@ d+=h
505	eor	r3,r3,r7			@ Maj(a,b,c)
506	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
507	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
508#if __ARM_ARCH__>=7
509	@ ldr	r2,[r1],#4			@ 7
510# if 7==15
511	str	r1,[sp,#17*4]			@ make room for r1
512# endif
513	eor	r0,r9,r9,ror#5
514	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
515	eor	r0,r0,r9,ror#19	@ Sigma1(e)
516	rev	r2,r2
517#else
518	@ ldrb	r2,[r1,#3]			@ 7
519	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
520	ldrb	r3,[r1,#2]
521	ldrb	r0,[r1,#1]
522	orr	r2,r2,r3,lsl#8
523	ldrb	r3,[r1],#4
524	orr	r2,r2,r0,lsl#16
525# if 7==15
526	str	r1,[sp,#17*4]			@ make room for r1
527# endif
528	eor	r0,r9,r9,ror#5
529	orr	r2,r2,r3,lsl#24
530	eor	r0,r0,r9,ror#19	@ Sigma1(e)
531#endif
532	ldr	r3,[r14],#4			@ *K256++
533	add	r4,r4,r2			@ h+=X[i]
534	str	r2,[sp,#7*4]
535	eor	r2,r10,r11
536	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
537	and	r2,r2,r9
538	add	r4,r4,r3			@ h+=K256[i]
539	eor	r2,r2,r11			@ Ch(e,f,g)
540	eor	r0,r5,r5,ror#11
541	add	r4,r4,r2			@ h+=Ch(e,f,g)
542#if 7==31
543	and	r3,r3,#0xff
544	cmp	r3,#0xf2			@ done?
545#endif
546#if 7<15
547# if __ARM_ARCH__>=7
548	ldr	r2,[r1],#4			@ prefetch
549# else
550	ldrb	r2,[r1,#3]
551# endif
552	eor	r3,r5,r6			@ a^b, b^c in next round
553#else
554	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
555	eor	r3,r5,r6			@ a^b, b^c in next round
556	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
557#endif
558	eor	r0,r0,r5,ror#20	@ Sigma0(a)
559	and	r12,r12,r3			@ (b^c)&=(a^b)
560	add	r8,r8,r4			@ d+=h
561	eor	r12,r12,r6			@ Maj(a,b,c)
562	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
563	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
564#if __ARM_ARCH__>=7
565	@ ldr	r2,[r1],#4			@ 8
566# if 8==15
567	str	r1,[sp,#17*4]			@ make room for r1
568# endif
569	eor	r0,r8,r8,ror#5
570	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
571	eor	r0,r0,r8,ror#19	@ Sigma1(e)
572	rev	r2,r2
573#else
574	@ ldrb	r2,[r1,#3]			@ 8
575	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
576	ldrb	r12,[r1,#2]
577	ldrb	r0,[r1,#1]
578	orr	r2,r2,r12,lsl#8
579	ldrb	r12,[r1],#4
580	orr	r2,r2,r0,lsl#16
581# if 8==15
582	str	r1,[sp,#17*4]			@ make room for r1
583# endif
584	eor	r0,r8,r8,ror#5
585	orr	r2,r2,r12,lsl#24
586	eor	r0,r0,r8,ror#19	@ Sigma1(e)
587#endif
588	ldr	r12,[r14],#4			@ *K256++
589	add	r11,r11,r2			@ h+=X[i]
590	str	r2,[sp,#8*4]
591	eor	r2,r9,r10
592	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
593	and	r2,r2,r8
594	add	r11,r11,r12			@ h+=K256[i]
595	eor	r2,r2,r10			@ Ch(e,f,g)
596	eor	r0,r4,r4,ror#11
597	add	r11,r11,r2			@ h+=Ch(e,f,g)
598#if 8==31
599	and	r12,r12,#0xff
600	cmp	r12,#0xf2			@ done?
601#endif
602#if 8<15
603# if __ARM_ARCH__>=7
604	ldr	r2,[r1],#4			@ prefetch
605# else
606	ldrb	r2,[r1,#3]
607# endif
608	eor	r12,r4,r5			@ a^b, b^c in next round
609#else
610	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
611	eor	r12,r4,r5			@ a^b, b^c in next round
612	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
613#endif
614	eor	r0,r0,r4,ror#20	@ Sigma0(a)
615	and	r3,r3,r12			@ (b^c)&=(a^b)
616	add	r7,r7,r11			@ d+=h
617	eor	r3,r3,r5			@ Maj(a,b,c)
618	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
619	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
620#if __ARM_ARCH__>=7
621	@ ldr	r2,[r1],#4			@ 9
622# if 9==15
623	str	r1,[sp,#17*4]			@ make room for r1
624# endif
625	eor	r0,r7,r7,ror#5
626	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
627	eor	r0,r0,r7,ror#19	@ Sigma1(e)
628	rev	r2,r2
629#else
630	@ ldrb	r2,[r1,#3]			@ 9
631	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
632	ldrb	r3,[r1,#2]
633	ldrb	r0,[r1,#1]
634	orr	r2,r2,r3,lsl#8
635	ldrb	r3,[r1],#4
636	orr	r2,r2,r0,lsl#16
637# if 9==15
638	str	r1,[sp,#17*4]			@ make room for r1
639# endif
640	eor	r0,r7,r7,ror#5
641	orr	r2,r2,r3,lsl#24
642	eor	r0,r0,r7,ror#19	@ Sigma1(e)
643#endif
644	ldr	r3,[r14],#4			@ *K256++
645	add	r10,r10,r2			@ h+=X[i]
646	str	r2,[sp,#9*4]
647	eor	r2,r8,r9
648	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
649	and	r2,r2,r7
650	add	r10,r10,r3			@ h+=K256[i]
651	eor	r2,r2,r9			@ Ch(e,f,g)
652	eor	r0,r11,r11,ror#11
653	add	r10,r10,r2			@ h+=Ch(e,f,g)
654#if 9==31
655	and	r3,r3,#0xff
656	cmp	r3,#0xf2			@ done?
657#endif
658#if 9<15
659# if __ARM_ARCH__>=7
660	ldr	r2,[r1],#4			@ prefetch
661# else
662	ldrb	r2,[r1,#3]
663# endif
664	eor	r3,r11,r4			@ a^b, b^c in next round
665#else
666	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
667	eor	r3,r11,r4			@ a^b, b^c in next round
668	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
669#endif
670	eor	r0,r0,r11,ror#20	@ Sigma0(a)
671	and	r12,r12,r3			@ (b^c)&=(a^b)
672	add	r6,r6,r10			@ d+=h
673	eor	r12,r12,r4			@ Maj(a,b,c)
674	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
675	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
676#if __ARM_ARCH__>=7
677	@ ldr	r2,[r1],#4			@ 10
678# if 10==15
679	str	r1,[sp,#17*4]			@ make room for r1
680# endif
681	eor	r0,r6,r6,ror#5
682	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
683	eor	r0,r0,r6,ror#19	@ Sigma1(e)
684	rev	r2,r2
685#else
686	@ ldrb	r2,[r1,#3]			@ 10
687	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
688	ldrb	r12,[r1,#2]
689	ldrb	r0,[r1,#1]
690	orr	r2,r2,r12,lsl#8
691	ldrb	r12,[r1],#4
692	orr	r2,r2,r0,lsl#16
693# if 10==15
694	str	r1,[sp,#17*4]			@ make room for r1
695# endif
696	eor	r0,r6,r6,ror#5
697	orr	r2,r2,r12,lsl#24
698	eor	r0,r0,r6,ror#19	@ Sigma1(e)
699#endif
700	ldr	r12,[r14],#4			@ *K256++
701	add	r9,r9,r2			@ h+=X[i]
702	str	r2,[sp,#10*4]
703	eor	r2,r7,r8
704	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
705	and	r2,r2,r6
706	add	r9,r9,r12			@ h+=K256[i]
707	eor	r2,r2,r8			@ Ch(e,f,g)
708	eor	r0,r10,r10,ror#11
709	add	r9,r9,r2			@ h+=Ch(e,f,g)
710#if 10==31
711	and	r12,r12,#0xff
712	cmp	r12,#0xf2			@ done?
713#endif
714#if 10<15
715# if __ARM_ARCH__>=7
716	ldr	r2,[r1],#4			@ prefetch
717# else
718	ldrb	r2,[r1,#3]
719# endif
720	eor	r12,r10,r11			@ a^b, b^c in next round
721#else
722	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
723	eor	r12,r10,r11			@ a^b, b^c in next round
724	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
725#endif
726	eor	r0,r0,r10,ror#20	@ Sigma0(a)
727	and	r3,r3,r12			@ (b^c)&=(a^b)
728	add	r5,r5,r9			@ d+=h
729	eor	r3,r3,r11			@ Maj(a,b,c)
730	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
731	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
732#if __ARM_ARCH__>=7
733	@ ldr	r2,[r1],#4			@ 11
734# if 11==15
735	str	r1,[sp,#17*4]			@ make room for r1
736# endif
737	eor	r0,r5,r5,ror#5
738	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
739	eor	r0,r0,r5,ror#19	@ Sigma1(e)
740	rev	r2,r2
741#else
742	@ ldrb	r2,[r1,#3]			@ 11
743	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
744	ldrb	r3,[r1,#2]
745	ldrb	r0,[r1,#1]
746	orr	r2,r2,r3,lsl#8
747	ldrb	r3,[r1],#4
748	orr	r2,r2,r0,lsl#16
749# if 11==15
750	str	r1,[sp,#17*4]			@ make room for r1
751# endif
752	eor	r0,r5,r5,ror#5
753	orr	r2,r2,r3,lsl#24
754	eor	r0,r0,r5,ror#19	@ Sigma1(e)
755#endif
756	ldr	r3,[r14],#4			@ *K256++
757	add	r8,r8,r2			@ h+=X[i]
758	str	r2,[sp,#11*4]
759	eor	r2,r6,r7
760	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
761	and	r2,r2,r5
762	add	r8,r8,r3			@ h+=K256[i]
763	eor	r2,r2,r7			@ Ch(e,f,g)
764	eor	r0,r9,r9,ror#11
765	add	r8,r8,r2			@ h+=Ch(e,f,g)
766#if 11==31
767	and	r3,r3,#0xff
768	cmp	r3,#0xf2			@ done?
769#endif
770#if 11<15
771# if __ARM_ARCH__>=7
772	ldr	r2,[r1],#4			@ prefetch
773# else
774	ldrb	r2,[r1,#3]
775# endif
776	eor	r3,r9,r10			@ a^b, b^c in next round
777#else
778	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
779	eor	r3,r9,r10			@ a^b, b^c in next round
780	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
781#endif
782	eor	r0,r0,r9,ror#20	@ Sigma0(a)
783	and	r12,r12,r3			@ (b^c)&=(a^b)
784	add	r4,r4,r8			@ d+=h
785	eor	r12,r12,r10			@ Maj(a,b,c)
786	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
787	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
788#if __ARM_ARCH__>=7
789	@ ldr	r2,[r1],#4			@ 12
790# if 12==15
791	str	r1,[sp,#17*4]			@ make room for r1
792# endif
793	eor	r0,r4,r4,ror#5
794	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
795	eor	r0,r0,r4,ror#19	@ Sigma1(e)
796	rev	r2,r2
797#else
798	@ ldrb	r2,[r1,#3]			@ 12
799	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
800	ldrb	r12,[r1,#2]
801	ldrb	r0,[r1,#1]
802	orr	r2,r2,r12,lsl#8
803	ldrb	r12,[r1],#4
804	orr	r2,r2,r0,lsl#16
805# if 12==15
806	str	r1,[sp,#17*4]			@ make room for r1
807# endif
808	eor	r0,r4,r4,ror#5
809	orr	r2,r2,r12,lsl#24
810	eor	r0,r0,r4,ror#19	@ Sigma1(e)
811#endif
812	ldr	r12,[r14],#4			@ *K256++
813	add	r7,r7,r2			@ h+=X[i]
814	str	r2,[sp,#12*4]
815	eor	r2,r5,r6
816	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
817	and	r2,r2,r4
818	add	r7,r7,r12			@ h+=K256[i]
819	eor	r2,r2,r6			@ Ch(e,f,g)
820	eor	r0,r8,r8,ror#11
821	add	r7,r7,r2			@ h+=Ch(e,f,g)
822#if 12==31
823	and	r12,r12,#0xff
824	cmp	r12,#0xf2			@ done?
825#endif
826#if 12<15
827# if __ARM_ARCH__>=7
828	ldr	r2,[r1],#4			@ prefetch
829# else
830	ldrb	r2,[r1,#3]
831# endif
832	eor	r12,r8,r9			@ a^b, b^c in next round
833#else
834	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
835	eor	r12,r8,r9			@ a^b, b^c in next round
836	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
837#endif
838	eor	r0,r0,r8,ror#20	@ Sigma0(a)
839	and	r3,r3,r12			@ (b^c)&=(a^b)
840	add	r11,r11,r7			@ d+=h
841	eor	r3,r3,r9			@ Maj(a,b,c)
842	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
843	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
844#if __ARM_ARCH__>=7
845	@ ldr	r2,[r1],#4			@ 13
846# if 13==15
847	str	r1,[sp,#17*4]			@ make room for r1
848# endif
849	eor	r0,r11,r11,ror#5
850	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
851	eor	r0,r0,r11,ror#19	@ Sigma1(e)
852	rev	r2,r2
853#else
854	@ ldrb	r2,[r1,#3]			@ 13
855	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
856	ldrb	r3,[r1,#2]
857	ldrb	r0,[r1,#1]
858	orr	r2,r2,r3,lsl#8
859	ldrb	r3,[r1],#4
860	orr	r2,r2,r0,lsl#16
861# if 13==15
862	str	r1,[sp,#17*4]			@ make room for r1
863# endif
864	eor	r0,r11,r11,ror#5
865	orr	r2,r2,r3,lsl#24
866	eor	r0,r0,r11,ror#19	@ Sigma1(e)
867#endif
868	ldr	r3,[r14],#4			@ *K256++
869	add	r6,r6,r2			@ h+=X[i]
870	str	r2,[sp,#13*4]
871	eor	r2,r4,r5
872	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
873	and	r2,r2,r11
874	add	r6,r6,r3			@ h+=K256[i]
875	eor	r2,r2,r5			@ Ch(e,f,g)
876	eor	r0,r7,r7,ror#11
877	add	r6,r6,r2			@ h+=Ch(e,f,g)
878#if 13==31
879	and	r3,r3,#0xff
880	cmp	r3,#0xf2			@ done?
881#endif
882#if 13<15
883# if __ARM_ARCH__>=7
884	ldr	r2,[r1],#4			@ prefetch
885# else
886	ldrb	r2,[r1,#3]
887# endif
888	eor	r3,r7,r8			@ a^b, b^c in next round
889#else
890	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
891	eor	r3,r7,r8			@ a^b, b^c in next round
892	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
893#endif
894	eor	r0,r0,r7,ror#20	@ Sigma0(a)
895	and	r12,r12,r3			@ (b^c)&=(a^b)
896	add	r10,r10,r6			@ d+=h
897	eor	r12,r12,r8			@ Maj(a,b,c)
898	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
899	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
900#if __ARM_ARCH__>=7
901	@ ldr	r2,[r1],#4			@ 14
902# if 14==15
903	str	r1,[sp,#17*4]			@ make room for r1
904# endif
905	eor	r0,r10,r10,ror#5
906	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
907	eor	r0,r0,r10,ror#19	@ Sigma1(e)
908	rev	r2,r2
909#else
910	@ ldrb	r2,[r1,#3]			@ 14
911	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
912	ldrb	r12,[r1,#2]
913	ldrb	r0,[r1,#1]
914	orr	r2,r2,r12,lsl#8
915	ldrb	r12,[r1],#4
916	orr	r2,r2,r0,lsl#16
917# if 14==15
918	str	r1,[sp,#17*4]			@ make room for r1
919# endif
920	eor	r0,r10,r10,ror#5
921	orr	r2,r2,r12,lsl#24
922	eor	r0,r0,r10,ror#19	@ Sigma1(e)
923#endif
924	ldr	r12,[r14],#4			@ *K256++
925	add	r5,r5,r2			@ h+=X[i]
926	str	r2,[sp,#14*4]
927	eor	r2,r11,r4
928	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
929	and	r2,r2,r10
930	add	r5,r5,r12			@ h+=K256[i]
931	eor	r2,r2,r4			@ Ch(e,f,g)
932	eor	r0,r6,r6,ror#11
933	add	r5,r5,r2			@ h+=Ch(e,f,g)
934#if 14==31
935	and	r12,r12,#0xff
936	cmp	r12,#0xf2			@ done?
937#endif
938#if 14<15
939# if __ARM_ARCH__>=7
940	ldr	r2,[r1],#4			@ prefetch
941# else
942	ldrb	r2,[r1,#3]
943# endif
944	eor	r12,r6,r7			@ a^b, b^c in next round
945#else
946	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
947	eor	r12,r6,r7			@ a^b, b^c in next round
948	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
949#endif
950	eor	r0,r0,r6,ror#20	@ Sigma0(a)
951	and	r3,r3,r12			@ (b^c)&=(a^b)
952	add	r9,r9,r5			@ d+=h
953	eor	r3,r3,r7			@ Maj(a,b,c)
954	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
955	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
956#if __ARM_ARCH__>=7
957	@ ldr	r2,[r1],#4			@ 15
958# if 15==15
959	str	r1,[sp,#17*4]			@ make room for r1
960# endif
961	eor	r0,r9,r9,ror#5
962	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
963	eor	r0,r0,r9,ror#19	@ Sigma1(e)
964	rev	r2,r2
965#else
966	@ ldrb	r2,[r1,#3]			@ 15
967	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
968	ldrb	r3,[r1,#2]
969	ldrb	r0,[r1,#1]
970	orr	r2,r2,r3,lsl#8
971	ldrb	r3,[r1],#4
972	orr	r2,r2,r0,lsl#16
973# if 15==15
974	str	r1,[sp,#17*4]			@ make room for r1
975# endif
976	eor	r0,r9,r9,ror#5
977	orr	r2,r2,r3,lsl#24
978	eor	r0,r0,r9,ror#19	@ Sigma1(e)
979#endif
980	ldr	r3,[r14],#4			@ *K256++
981	add	r4,r4,r2			@ h+=X[i]
982	str	r2,[sp,#15*4]
983	eor	r2,r10,r11
984	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
985	and	r2,r2,r9
986	add	r4,r4,r3			@ h+=K256[i]
987	eor	r2,r2,r11			@ Ch(e,f,g)
988	eor	r0,r5,r5,ror#11
989	add	r4,r4,r2			@ h+=Ch(e,f,g)
990#if 15==31
991	and	r3,r3,#0xff
992	cmp	r3,#0xf2			@ done?
993#endif
994#if 15<15
995# if __ARM_ARCH__>=7
996	ldr	r2,[r1],#4			@ prefetch
997# else
998	ldrb	r2,[r1,#3]
999# endif
1000	eor	r3,r5,r6			@ a^b, b^c in next round
1001#else
1002	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1003	eor	r3,r5,r6			@ a^b, b^c in next round
1004	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1005#endif
1006	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1007	and	r12,r12,r3			@ (b^c)&=(a^b)
1008	add	r8,r8,r4			@ d+=h
1009	eor	r12,r12,r6			@ Maj(a,b,c)
1010	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1011	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1012.Lrounds_16_xx:
1013	@ ldr	r2,[sp,#1*4]		@ 16
1014	@ ldr	r1,[sp,#14*4]
1015	mov	r0,r2,ror#7
1016	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1017	mov	r12,r1,ror#17
1018	eor	r0,r0,r2,ror#18
1019	eor	r12,r12,r1,ror#19
1020	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1021	ldr	r2,[sp,#0*4]
1022	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1023	ldr	r1,[sp,#9*4]
1024
1025	add	r12,r12,r0
1026	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1027	add	r2,r2,r12
1028	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1029	add	r2,r2,r1			@ X[i]
1030	ldr	r12,[r14],#4			@ *K256++
1031	add	r11,r11,r2			@ h+=X[i]
1032	str	r2,[sp,#0*4]
1033	eor	r2,r9,r10
1034	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1035	and	r2,r2,r8
1036	add	r11,r11,r12			@ h+=K256[i]
1037	eor	r2,r2,r10			@ Ch(e,f,g)
1038	eor	r0,r4,r4,ror#11
1039	add	r11,r11,r2			@ h+=Ch(e,f,g)
1040#if 16==31
1041	and	r12,r12,#0xff
1042	cmp	r12,#0xf2			@ done?
1043#endif
1044#if 16<15
1045# if __ARM_ARCH__>=7
1046	ldr	r2,[r1],#4			@ prefetch
1047# else
1048	ldrb	r2,[r1,#3]
1049# endif
1050	eor	r12,r4,r5			@ a^b, b^c in next round
1051#else
1052	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1053	eor	r12,r4,r5			@ a^b, b^c in next round
1054	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1055#endif
1056	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1057	and	r3,r3,r12			@ (b^c)&=(a^b)
1058	add	r7,r7,r11			@ d+=h
1059	eor	r3,r3,r5			@ Maj(a,b,c)
1060	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1061	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1062	@ ldr	r2,[sp,#2*4]		@ 17
1063	@ ldr	r1,[sp,#15*4]
1064	mov	r0,r2,ror#7
1065	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1066	mov	r3,r1,ror#17
1067	eor	r0,r0,r2,ror#18
1068	eor	r3,r3,r1,ror#19
1069	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1070	ldr	r2,[sp,#1*4]
1071	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1072	ldr	r1,[sp,#10*4]
1073
1074	add	r3,r3,r0
1075	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1076	add	r2,r2,r3
1077	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1078	add	r2,r2,r1			@ X[i]
1079	ldr	r3,[r14],#4			@ *K256++
1080	add	r10,r10,r2			@ h+=X[i]
1081	str	r2,[sp,#1*4]
1082	eor	r2,r8,r9
1083	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1084	and	r2,r2,r7
1085	add	r10,r10,r3			@ h+=K256[i]
1086	eor	r2,r2,r9			@ Ch(e,f,g)
1087	eor	r0,r11,r11,ror#11
1088	add	r10,r10,r2			@ h+=Ch(e,f,g)
1089#if 17==31
1090	and	r3,r3,#0xff
1091	cmp	r3,#0xf2			@ done?
1092#endif
1093#if 17<15
1094# if __ARM_ARCH__>=7
1095	ldr	r2,[r1],#4			@ prefetch
1096# else
1097	ldrb	r2,[r1,#3]
1098# endif
1099	eor	r3,r11,r4			@ a^b, b^c in next round
1100#else
1101	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1102	eor	r3,r11,r4			@ a^b, b^c in next round
1103	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1104#endif
1105	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1106	and	r12,r12,r3			@ (b^c)&=(a^b)
1107	add	r6,r6,r10			@ d+=h
1108	eor	r12,r12,r4			@ Maj(a,b,c)
1109	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1110	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1111	@ ldr	r2,[sp,#3*4]		@ 18
1112	@ ldr	r1,[sp,#0*4]
1113	mov	r0,r2,ror#7
1114	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1115	mov	r12,r1,ror#17
1116	eor	r0,r0,r2,ror#18
1117	eor	r12,r12,r1,ror#19
1118	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1119	ldr	r2,[sp,#2*4]
1120	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1121	ldr	r1,[sp,#11*4]
1122
1123	add	r12,r12,r0
1124	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1125	add	r2,r2,r12
1126	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1127	add	r2,r2,r1			@ X[i]
1128	ldr	r12,[r14],#4			@ *K256++
1129	add	r9,r9,r2			@ h+=X[i]
1130	str	r2,[sp,#2*4]
1131	eor	r2,r7,r8
1132	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1133	and	r2,r2,r6
1134	add	r9,r9,r12			@ h+=K256[i]
1135	eor	r2,r2,r8			@ Ch(e,f,g)
1136	eor	r0,r10,r10,ror#11
1137	add	r9,r9,r2			@ h+=Ch(e,f,g)
1138#if 18==31
1139	and	r12,r12,#0xff
1140	cmp	r12,#0xf2			@ done?
1141#endif
1142#if 18<15
1143# if __ARM_ARCH__>=7
1144	ldr	r2,[r1],#4			@ prefetch
1145# else
1146	ldrb	r2,[r1,#3]
1147# endif
1148	eor	r12,r10,r11			@ a^b, b^c in next round
1149#else
1150	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1151	eor	r12,r10,r11			@ a^b, b^c in next round
1152	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1153#endif
1154	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1155	and	r3,r3,r12			@ (b^c)&=(a^b)
1156	add	r5,r5,r9			@ d+=h
1157	eor	r3,r3,r11			@ Maj(a,b,c)
1158	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1159	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1160	@ ldr	r2,[sp,#4*4]		@ 19
1161	@ ldr	r1,[sp,#1*4]
1162	mov	r0,r2,ror#7
1163	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1164	mov	r3,r1,ror#17
1165	eor	r0,r0,r2,ror#18
1166	eor	r3,r3,r1,ror#19
1167	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1168	ldr	r2,[sp,#3*4]
1169	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1170	ldr	r1,[sp,#12*4]
1171
1172	add	r3,r3,r0
1173	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1174	add	r2,r2,r3
1175	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1176	add	r2,r2,r1			@ X[i]
1177	ldr	r3,[r14],#4			@ *K256++
1178	add	r8,r8,r2			@ h+=X[i]
1179	str	r2,[sp,#3*4]
1180	eor	r2,r6,r7
1181	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1182	and	r2,r2,r5
1183	add	r8,r8,r3			@ h+=K256[i]
1184	eor	r2,r2,r7			@ Ch(e,f,g)
1185	eor	r0,r9,r9,ror#11
1186	add	r8,r8,r2			@ h+=Ch(e,f,g)
1187#if 19==31
1188	and	r3,r3,#0xff
1189	cmp	r3,#0xf2			@ done?
1190#endif
1191#if 19<15
1192# if __ARM_ARCH__>=7
1193	ldr	r2,[r1],#4			@ prefetch
1194# else
1195	ldrb	r2,[r1,#3]
1196# endif
1197	eor	r3,r9,r10			@ a^b, b^c in next round
1198#else
1199	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1200	eor	r3,r9,r10			@ a^b, b^c in next round
1201	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1202#endif
1203	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1204	and	r12,r12,r3			@ (b^c)&=(a^b)
1205	add	r4,r4,r8			@ d+=h
1206	eor	r12,r12,r10			@ Maj(a,b,c)
1207	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1208	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1209	@ ldr	r2,[sp,#5*4]		@ 20
1210	@ ldr	r1,[sp,#2*4]
1211	mov	r0,r2,ror#7
1212	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1213	mov	r12,r1,ror#17
1214	eor	r0,r0,r2,ror#18
1215	eor	r12,r12,r1,ror#19
1216	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1217	ldr	r2,[sp,#4*4]
1218	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1219	ldr	r1,[sp,#13*4]
1220
1221	add	r12,r12,r0
1222	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1223	add	r2,r2,r12
1224	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1225	add	r2,r2,r1			@ X[i]
1226	ldr	r12,[r14],#4			@ *K256++
1227	add	r7,r7,r2			@ h+=X[i]
1228	str	r2,[sp,#4*4]
1229	eor	r2,r5,r6
1230	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1231	and	r2,r2,r4
1232	add	r7,r7,r12			@ h+=K256[i]
1233	eor	r2,r2,r6			@ Ch(e,f,g)
1234	eor	r0,r8,r8,ror#11
1235	add	r7,r7,r2			@ h+=Ch(e,f,g)
1236#if 20==31
1237	and	r12,r12,#0xff
1238	cmp	r12,#0xf2			@ done?
1239#endif
1240#if 20<15
1241# if __ARM_ARCH__>=7
1242	ldr	r2,[r1],#4			@ prefetch
1243# else
1244	ldrb	r2,[r1,#3]
1245# endif
1246	eor	r12,r8,r9			@ a^b, b^c in next round
1247#else
1248	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1249	eor	r12,r8,r9			@ a^b, b^c in next round
1250	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1251#endif
1252	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1253	and	r3,r3,r12			@ (b^c)&=(a^b)
1254	add	r11,r11,r7			@ d+=h
1255	eor	r3,r3,r9			@ Maj(a,b,c)
1256	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1257	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1258	@ ldr	r2,[sp,#6*4]		@ 21
1259	@ ldr	r1,[sp,#3*4]
1260	mov	r0,r2,ror#7
1261	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1262	mov	r3,r1,ror#17
1263	eor	r0,r0,r2,ror#18
1264	eor	r3,r3,r1,ror#19
1265	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1266	ldr	r2,[sp,#5*4]
1267	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1268	ldr	r1,[sp,#14*4]
1269
1270	add	r3,r3,r0
1271	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1272	add	r2,r2,r3
1273	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1274	add	r2,r2,r1			@ X[i]
1275	ldr	r3,[r14],#4			@ *K256++
1276	add	r6,r6,r2			@ h+=X[i]
1277	str	r2,[sp,#5*4]
1278	eor	r2,r4,r5
1279	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1280	and	r2,r2,r11
1281	add	r6,r6,r3			@ h+=K256[i]
1282	eor	r2,r2,r5			@ Ch(e,f,g)
1283	eor	r0,r7,r7,ror#11
1284	add	r6,r6,r2			@ h+=Ch(e,f,g)
1285#if 21==31
1286	and	r3,r3,#0xff
1287	cmp	r3,#0xf2			@ done?
1288#endif
1289#if 21<15
1290# if __ARM_ARCH__>=7
1291	ldr	r2,[r1],#4			@ prefetch
1292# else
1293	ldrb	r2,[r1,#3]
1294# endif
1295	eor	r3,r7,r8			@ a^b, b^c in next round
1296#else
1297	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1298	eor	r3,r7,r8			@ a^b, b^c in next round
1299	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1300#endif
1301	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1302	and	r12,r12,r3			@ (b^c)&=(a^b)
1303	add	r10,r10,r6			@ d+=h
1304	eor	r12,r12,r8			@ Maj(a,b,c)
1305	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1306	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1307	@ ldr	r2,[sp,#7*4]		@ 22
1308	@ ldr	r1,[sp,#4*4]
1309	mov	r0,r2,ror#7
1310	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1311	mov	r12,r1,ror#17
1312	eor	r0,r0,r2,ror#18
1313	eor	r12,r12,r1,ror#19
1314	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1315	ldr	r2,[sp,#6*4]
1316	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1317	ldr	r1,[sp,#15*4]
1318
1319	add	r12,r12,r0
1320	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1321	add	r2,r2,r12
1322	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1323	add	r2,r2,r1			@ X[i]
1324	ldr	r12,[r14],#4			@ *K256++
1325	add	r5,r5,r2			@ h+=X[i]
1326	str	r2,[sp,#6*4]
1327	eor	r2,r11,r4
1328	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1329	and	r2,r2,r10
1330	add	r5,r5,r12			@ h+=K256[i]
1331	eor	r2,r2,r4			@ Ch(e,f,g)
1332	eor	r0,r6,r6,ror#11
1333	add	r5,r5,r2			@ h+=Ch(e,f,g)
1334#if 22==31
1335	and	r12,r12,#0xff
1336	cmp	r12,#0xf2			@ done?
1337#endif
1338#if 22<15
1339# if __ARM_ARCH__>=7
1340	ldr	r2,[r1],#4			@ prefetch
1341# else
1342	ldrb	r2,[r1,#3]
1343# endif
1344	eor	r12,r6,r7			@ a^b, b^c in next round
1345#else
1346	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1347	eor	r12,r6,r7			@ a^b, b^c in next round
1348	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1349#endif
1350	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1351	and	r3,r3,r12			@ (b^c)&=(a^b)
1352	add	r9,r9,r5			@ d+=h
1353	eor	r3,r3,r7			@ Maj(a,b,c)
1354	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1355	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1356	@ ldr	r2,[sp,#8*4]		@ 23
1357	@ ldr	r1,[sp,#5*4]
1358	mov	r0,r2,ror#7
1359	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1360	mov	r3,r1,ror#17
1361	eor	r0,r0,r2,ror#18
1362	eor	r3,r3,r1,ror#19
1363	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1364	ldr	r2,[sp,#7*4]
1365	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1366	ldr	r1,[sp,#0*4]
1367
1368	add	r3,r3,r0
1369	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1370	add	r2,r2,r3
1371	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1372	add	r2,r2,r1			@ X[i]
1373	ldr	r3,[r14],#4			@ *K256++
1374	add	r4,r4,r2			@ h+=X[i]
1375	str	r2,[sp,#7*4]
1376	eor	r2,r10,r11
1377	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1378	and	r2,r2,r9
1379	add	r4,r4,r3			@ h+=K256[i]
1380	eor	r2,r2,r11			@ Ch(e,f,g)
1381	eor	r0,r5,r5,ror#11
1382	add	r4,r4,r2			@ h+=Ch(e,f,g)
1383#if 23==31
1384	and	r3,r3,#0xff
1385	cmp	r3,#0xf2			@ done?
1386#endif
1387#if 23<15
1388# if __ARM_ARCH__>=7
1389	ldr	r2,[r1],#4			@ prefetch
1390# else
1391	ldrb	r2,[r1,#3]
1392# endif
1393	eor	r3,r5,r6			@ a^b, b^c in next round
1394#else
1395	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1396	eor	r3,r5,r6			@ a^b, b^c in next round
1397	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1398#endif
1399	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1400	and	r12,r12,r3			@ (b^c)&=(a^b)
1401	add	r8,r8,r4			@ d+=h
1402	eor	r12,r12,r6			@ Maj(a,b,c)
1403	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1404	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1405	@ ldr	r2,[sp,#9*4]		@ 24
1406	@ ldr	r1,[sp,#6*4]
1407	mov	r0,r2,ror#7
1408	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1409	mov	r12,r1,ror#17
1410	eor	r0,r0,r2,ror#18
1411	eor	r12,r12,r1,ror#19
1412	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1413	ldr	r2,[sp,#8*4]
1414	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1415	ldr	r1,[sp,#1*4]
1416
1417	add	r12,r12,r0
1418	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1419	add	r2,r2,r12
1420	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1421	add	r2,r2,r1			@ X[i]
1422	ldr	r12,[r14],#4			@ *K256++
1423	add	r11,r11,r2			@ h+=X[i]
1424	str	r2,[sp,#8*4]
1425	eor	r2,r9,r10
1426	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1427	and	r2,r2,r8
1428	add	r11,r11,r12			@ h+=K256[i]
1429	eor	r2,r2,r10			@ Ch(e,f,g)
1430	eor	r0,r4,r4,ror#11
1431	add	r11,r11,r2			@ h+=Ch(e,f,g)
1432#if 24==31
1433	and	r12,r12,#0xff
1434	cmp	r12,#0xf2			@ done?
1435#endif
1436#if 24<15
1437# if __ARM_ARCH__>=7
1438	ldr	r2,[r1],#4			@ prefetch
1439# else
1440	ldrb	r2,[r1,#3]
1441# endif
1442	eor	r12,r4,r5			@ a^b, b^c in next round
1443#else
1444	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1445	eor	r12,r4,r5			@ a^b, b^c in next round
1446	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1447#endif
1448	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1449	and	r3,r3,r12			@ (b^c)&=(a^b)
1450	add	r7,r7,r11			@ d+=h
1451	eor	r3,r3,r5			@ Maj(a,b,c)
1452	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1453	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1454	@ ldr	r2,[sp,#10*4]		@ 25
1455	@ ldr	r1,[sp,#7*4]
1456	mov	r0,r2,ror#7
1457	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1458	mov	r3,r1,ror#17
1459	eor	r0,r0,r2,ror#18
1460	eor	r3,r3,r1,ror#19
1461	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1462	ldr	r2,[sp,#9*4]
1463	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1464	ldr	r1,[sp,#2*4]
1465
1466	add	r3,r3,r0
1467	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1468	add	r2,r2,r3
1469	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1470	add	r2,r2,r1			@ X[i]
1471	ldr	r3,[r14],#4			@ *K256++
1472	add	r10,r10,r2			@ h+=X[i]
1473	str	r2,[sp,#9*4]
1474	eor	r2,r8,r9
1475	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1476	and	r2,r2,r7
1477	add	r10,r10,r3			@ h+=K256[i]
1478	eor	r2,r2,r9			@ Ch(e,f,g)
1479	eor	r0,r11,r11,ror#11
1480	add	r10,r10,r2			@ h+=Ch(e,f,g)
1481#if 25==31
1482	and	r3,r3,#0xff
1483	cmp	r3,#0xf2			@ done?
1484#endif
1485#if 25<15
1486# if __ARM_ARCH__>=7
1487	ldr	r2,[r1],#4			@ prefetch
1488# else
1489	ldrb	r2,[r1,#3]
1490# endif
1491	eor	r3,r11,r4			@ a^b, b^c in next round
1492#else
1493	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1494	eor	r3,r11,r4			@ a^b, b^c in next round
1495	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1496#endif
1497	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1498	and	r12,r12,r3			@ (b^c)&=(a^b)
1499	add	r6,r6,r10			@ d+=h
1500	eor	r12,r12,r4			@ Maj(a,b,c)
1501	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1502	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1503	@ ldr	r2,[sp,#11*4]		@ 26
1504	@ ldr	r1,[sp,#8*4]
1505	mov	r0,r2,ror#7
1506	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1507	mov	r12,r1,ror#17
1508	eor	r0,r0,r2,ror#18
1509	eor	r12,r12,r1,ror#19
1510	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1511	ldr	r2,[sp,#10*4]
1512	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1513	ldr	r1,[sp,#3*4]
1514
1515	add	r12,r12,r0
1516	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1517	add	r2,r2,r12
1518	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1519	add	r2,r2,r1			@ X[i]
1520	ldr	r12,[r14],#4			@ *K256++
1521	add	r9,r9,r2			@ h+=X[i]
1522	str	r2,[sp,#10*4]
1523	eor	r2,r7,r8
1524	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1525	and	r2,r2,r6
1526	add	r9,r9,r12			@ h+=K256[i]
1527	eor	r2,r2,r8			@ Ch(e,f,g)
1528	eor	r0,r10,r10,ror#11
1529	add	r9,r9,r2			@ h+=Ch(e,f,g)
1530#if 26==31
1531	and	r12,r12,#0xff
1532	cmp	r12,#0xf2			@ done?
1533#endif
1534#if 26<15
1535# if __ARM_ARCH__>=7
1536	ldr	r2,[r1],#4			@ prefetch
1537# else
1538	ldrb	r2,[r1,#3]
1539# endif
1540	eor	r12,r10,r11			@ a^b, b^c in next round
1541#else
1542	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1543	eor	r12,r10,r11			@ a^b, b^c in next round
1544	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1545#endif
1546	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1547	and	r3,r3,r12			@ (b^c)&=(a^b)
1548	add	r5,r5,r9			@ d+=h
1549	eor	r3,r3,r11			@ Maj(a,b,c)
1550	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1551	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1552	@ ldr	r2,[sp,#12*4]		@ 27
1553	@ ldr	r1,[sp,#9*4]
1554	mov	r0,r2,ror#7
1555	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1556	mov	r3,r1,ror#17
1557	eor	r0,r0,r2,ror#18
1558	eor	r3,r3,r1,ror#19
1559	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1560	ldr	r2,[sp,#11*4]
1561	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1562	ldr	r1,[sp,#4*4]
1563
1564	add	r3,r3,r0
1565	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1566	add	r2,r2,r3
1567	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1568	add	r2,r2,r1			@ X[i]
1569	ldr	r3,[r14],#4			@ *K256++
1570	add	r8,r8,r2			@ h+=X[i]
1571	str	r2,[sp,#11*4]
1572	eor	r2,r6,r7
1573	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1574	and	r2,r2,r5
1575	add	r8,r8,r3			@ h+=K256[i]
1576	eor	r2,r2,r7			@ Ch(e,f,g)
1577	eor	r0,r9,r9,ror#11
1578	add	r8,r8,r2			@ h+=Ch(e,f,g)
1579#if 27==31
1580	and	r3,r3,#0xff
1581	cmp	r3,#0xf2			@ done?
1582#endif
1583#if 27<15
1584# if __ARM_ARCH__>=7
1585	ldr	r2,[r1],#4			@ prefetch
1586# else
1587	ldrb	r2,[r1,#3]
1588# endif
1589	eor	r3,r9,r10			@ a^b, b^c in next round
1590#else
1591	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1592	eor	r3,r9,r10			@ a^b, b^c in next round
1593	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1594#endif
1595	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1596	and	r12,r12,r3			@ (b^c)&=(a^b)
1597	add	r4,r4,r8			@ d+=h
1598	eor	r12,r12,r10			@ Maj(a,b,c)
1599	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1600	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1601	@ ldr	r2,[sp,#13*4]		@ 28
1602	@ ldr	r1,[sp,#10*4]
1603	mov	r0,r2,ror#7
1604	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1605	mov	r12,r1,ror#17
1606	eor	r0,r0,r2,ror#18
1607	eor	r12,r12,r1,ror#19
1608	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1609	ldr	r2,[sp,#12*4]
1610	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1611	ldr	r1,[sp,#5*4]
1612
1613	add	r12,r12,r0
1614	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1615	add	r2,r2,r12
1616	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1617	add	r2,r2,r1			@ X[i]
1618	ldr	r12,[r14],#4			@ *K256++
1619	add	r7,r7,r2			@ h+=X[i]
1620	str	r2,[sp,#12*4]
1621	eor	r2,r5,r6
1622	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1623	and	r2,r2,r4
1624	add	r7,r7,r12			@ h+=K256[i]
1625	eor	r2,r2,r6			@ Ch(e,f,g)
1626	eor	r0,r8,r8,ror#11
1627	add	r7,r7,r2			@ h+=Ch(e,f,g)
1628#if 28==31
1629	and	r12,r12,#0xff
1630	cmp	r12,#0xf2			@ done?
1631#endif
1632#if 28<15
1633# if __ARM_ARCH__>=7
1634	ldr	r2,[r1],#4			@ prefetch
1635# else
1636	ldrb	r2,[r1,#3]
1637# endif
1638	eor	r12,r8,r9			@ a^b, b^c in next round
1639#else
1640	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1641	eor	r12,r8,r9			@ a^b, b^c in next round
1642	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1643#endif
1644	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1645	and	r3,r3,r12			@ (b^c)&=(a^b)
1646	add	r11,r11,r7			@ d+=h
1647	eor	r3,r3,r9			@ Maj(a,b,c)
1648	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1649	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1650	@ ldr	r2,[sp,#14*4]		@ 29
1651	@ ldr	r1,[sp,#11*4]
1652	mov	r0,r2,ror#7
1653	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1654	mov	r3,r1,ror#17
1655	eor	r0,r0,r2,ror#18
1656	eor	r3,r3,r1,ror#19
1657	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1658	ldr	r2,[sp,#13*4]
1659	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1660	ldr	r1,[sp,#6*4]
1661
1662	add	r3,r3,r0
1663	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1664	add	r2,r2,r3
1665	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1666	add	r2,r2,r1			@ X[i]
1667	ldr	r3,[r14],#4			@ *K256++
1668	add	r6,r6,r2			@ h+=X[i]
1669	str	r2,[sp,#13*4]
1670	eor	r2,r4,r5
1671	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1672	and	r2,r2,r11
1673	add	r6,r6,r3			@ h+=K256[i]
1674	eor	r2,r2,r5			@ Ch(e,f,g)
1675	eor	r0,r7,r7,ror#11
1676	add	r6,r6,r2			@ h+=Ch(e,f,g)
1677#if 29==31
1678	and	r3,r3,#0xff
1679	cmp	r3,#0xf2			@ done?
1680#endif
1681#if 29<15
1682# if __ARM_ARCH__>=7
1683	ldr	r2,[r1],#4			@ prefetch
1684# else
1685	ldrb	r2,[r1,#3]
1686# endif
1687	eor	r3,r7,r8			@ a^b, b^c in next round
1688#else
1689	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1690	eor	r3,r7,r8			@ a^b, b^c in next round
1691	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1692#endif
1693	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1694	and	r12,r12,r3			@ (b^c)&=(a^b)
1695	add	r10,r10,r6			@ d+=h
1696	eor	r12,r12,r8			@ Maj(a,b,c)
1697	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1698	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1699	@ ldr	r2,[sp,#15*4]		@ 30
1700	@ ldr	r1,[sp,#12*4]
1701	mov	r0,r2,ror#7
1702	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1703	mov	r12,r1,ror#17
1704	eor	r0,r0,r2,ror#18
1705	eor	r12,r12,r1,ror#19
1706	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1707	ldr	r2,[sp,#14*4]
1708	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1709	ldr	r1,[sp,#7*4]
1710
1711	add	r12,r12,r0
1712	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1713	add	r2,r2,r12
1714	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1715	add	r2,r2,r1			@ X[i]
1716	ldr	r12,[r14],#4			@ *K256++
1717	add	r5,r5,r2			@ h+=X[i]
1718	str	r2,[sp,#14*4]
1719	eor	r2,r11,r4
1720	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1721	and	r2,r2,r10
1722	add	r5,r5,r12			@ h+=K256[i]
1723	eor	r2,r2,r4			@ Ch(e,f,g)
1724	eor	r0,r6,r6,ror#11
1725	add	r5,r5,r2			@ h+=Ch(e,f,g)
1726#if 30==31
1727	and	r12,r12,#0xff
1728	cmp	r12,#0xf2			@ done?
1729#endif
1730#if 30<15
1731# if __ARM_ARCH__>=7
1732	ldr	r2,[r1],#4			@ prefetch
1733# else
1734	ldrb	r2,[r1,#3]
1735# endif
1736	eor	r12,r6,r7			@ a^b, b^c in next round
1737#else
1738	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1739	eor	r12,r6,r7			@ a^b, b^c in next round
1740	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1741#endif
1742	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1743	and	r3,r3,r12			@ (b^c)&=(a^b)
1744	add	r9,r9,r5			@ d+=h
1745	eor	r3,r3,r7			@ Maj(a,b,c)
1746	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1747	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1748	@ ldr	r2,[sp,#0*4]		@ 31
1749	@ ldr	r1,[sp,#13*4]
1750	mov	r0,r2,ror#7
1751	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1752	mov	r3,r1,ror#17
1753	eor	r0,r0,r2,ror#18
1754	eor	r3,r3,r1,ror#19
1755	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1756	ldr	r2,[sp,#15*4]
1757	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1758	ldr	r1,[sp,#8*4]
1759
1760	add	r3,r3,r0
1761	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1762	add	r2,r2,r3
1763	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1764	add	r2,r2,r1			@ X[i]
1765	ldr	r3,[r14],#4			@ *K256++
1766	add	r4,r4,r2			@ h+=X[i]
1767	str	r2,[sp,#15*4]
1768	eor	r2,r10,r11
1769	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1770	and	r2,r2,r9
1771	add	r4,r4,r3			@ h+=K256[i]
1772	eor	r2,r2,r11			@ Ch(e,f,g)
1773	eor	r0,r5,r5,ror#11
1774	add	r4,r4,r2			@ h+=Ch(e,f,g)
1775#if 31==31
1776	and	r3,r3,#0xff
1777	cmp	r3,#0xf2			@ done?
1778#endif
1779#if 31<15
1780# if __ARM_ARCH__>=7
1781	ldr	r2,[r1],#4			@ prefetch
1782# else
1783	ldrb	r2,[r1,#3]
1784# endif
1785	eor	r3,r5,r6			@ a^b, b^c in next round
1786#else
1787	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1788	eor	r3,r5,r6			@ a^b, b^c in next round
1789	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1790#endif
1791	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1792	and	r12,r12,r3			@ (b^c)&=(a^b)
1793	add	r8,r8,r4			@ d+=h
1794	eor	r12,r12,r6			@ Maj(a,b,c)
1795	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1796	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1797#if __ARM_ARCH__>=7
1798	ite	eq			@ Thumb2 thing, sanity check in ARM
1799#endif
1800	ldreq	r3,[sp,#16*4]		@ pull ctx
1801	bne	.Lrounds_16_xx
1802
1803	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1804	ldr	r0,[r3,#0]
1805	ldr	r2,[r3,#4]
1806	ldr	r12,[r3,#8]
1807	add	r4,r4,r0
1808	ldr	r0,[r3,#12]
1809	add	r5,r5,r2
1810	ldr	r2,[r3,#16]
1811	add	r6,r6,r12
1812	ldr	r12,[r3,#20]
1813	add	r7,r7,r0
1814	ldr	r0,[r3,#24]
1815	add	r8,r8,r2
1816	ldr	r2,[r3,#28]
1817	add	r9,r9,r12
1818	ldr	r1,[sp,#17*4]		@ pull inp
1819	ldr	r12,[sp,#18*4]		@ pull inp+len
1820	add	r10,r10,r0
1821	add	r11,r11,r2
1822	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1823	cmp	r1,r12
1824	sub	r14,r14,#256	@ rewind Ktbl
1825	bne	.Loop
1826
1827	add	sp,sp,#19*4	@ destroy frame
1828#if __ARM_ARCH__>=5
1829	ldmia	sp!,{r4-r11,pc}
1830#else
1831	ldmia	sp!,{r4-r11,lr}
1832	tst	lr,#1
1833	moveq	pc,lr			@ be binary compatible with V4, yet
1834	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1835#endif
1836.size	sha256_block_data_order,.-sha256_block_data_order
1837#if __ARM_MAX_ARCH__>=7
1838.arch	armv7-a
1839.fpu	neon
1840
1841.global	sha256_block_data_order_neon
1842.type	sha256_block_data_order_neon,%function
1843.align	4
1844sha256_block_data_order_neon:
1845.LNEON:
1846	stmdb	sp!,{r4-r12,lr}
1847
1848	sub	r11,sp,#16*4+16
1849	adr	r14,K256
1850	bic	r11,r11,#15		@ align for 128-bit stores
1851	mov	r12,sp
1852	mov	sp,r11			@ alloca
1853	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1854
1855	vld1.8		{q0},[r1]!
1856	vld1.8		{q1},[r1]!
1857	vld1.8		{q2},[r1]!
1858	vld1.8		{q3},[r1]!
1859	vld1.32		{q8},[r14,:128]!
1860	vld1.32		{q9},[r14,:128]!
1861	vld1.32		{q10},[r14,:128]!
1862	vld1.32		{q11},[r14,:128]!
1863	vrev32.8	q0,q0		@ yes, even on
1864	str		r0,[sp,#64]
1865	vrev32.8	q1,q1		@ big-endian
1866	str		r1,[sp,#68]
1867	mov		r1,sp
1868	vrev32.8	q2,q2
1869	str		r2,[sp,#72]
1870	vrev32.8	q3,q3
1871	str		r12,[sp,#76]		@ save original sp
1872	vadd.i32	q8,q8,q0
1873	vadd.i32	q9,q9,q1
1874	vst1.32		{q8},[r1,:128]!
1875	vadd.i32	q10,q10,q2
1876	vst1.32		{q9},[r1,:128]!
1877	vadd.i32	q11,q11,q3
1878	vst1.32		{q10},[r1,:128]!
1879	vst1.32		{q11},[r1,:128]!
1880
1881	ldmia		r0,{r4-r11}
1882	sub		r1,r1,#64
1883	ldr		r2,[sp,#0]
1884	eor		r12,r12,r12
1885	eor		r3,r5,r6
1886	b		.L_00_48
1887
1888.align	4
1889.L_00_48:
1890	vext.8	q8,q0,q1,#4
1891	add	r11,r11,r2
1892	eor	r2,r9,r10
1893	eor	r0,r8,r8,ror#5
1894	vext.8	q9,q2,q3,#4
1895	add	r4,r4,r12
1896	and	r2,r2,r8
1897	eor	r12,r0,r8,ror#19
1898	vshr.u32	q10,q8,#7
1899	eor	r0,r4,r4,ror#11
1900	eor	r2,r2,r10
1901	vadd.i32	q0,q0,q9
1902	add	r11,r11,r12,ror#6
1903	eor	r12,r4,r5
1904	vshr.u32	q9,q8,#3
1905	eor	r0,r0,r4,ror#20
1906	add	r11,r11,r2
1907	vsli.32	q10,q8,#25
1908	ldr	r2,[sp,#4]
1909	and	r3,r3,r12
1910	vshr.u32	q11,q8,#18
1911	add	r7,r7,r11
1912	add	r11,r11,r0,ror#2
1913	eor	r3,r3,r5
1914	veor	q9,q9,q10
1915	add	r10,r10,r2
1916	vsli.32	q11,q8,#14
1917	eor	r2,r8,r9
1918	eor	r0,r7,r7,ror#5
1919	vshr.u32	d24,d7,#17
1920	add	r11,r11,r3
1921	and	r2,r2,r7
1922	veor	q9,q9,q11
1923	eor	r3,r0,r7,ror#19
1924	eor	r0,r11,r11,ror#11
1925	vsli.32	d24,d7,#15
1926	eor	r2,r2,r9
1927	add	r10,r10,r3,ror#6
1928	vshr.u32	d25,d7,#10
1929	eor	r3,r11,r4
1930	eor	r0,r0,r11,ror#20
1931	vadd.i32	q0,q0,q9
1932	add	r10,r10,r2
1933	ldr	r2,[sp,#8]
1934	veor	d25,d25,d24
1935	and	r12,r12,r3
1936	add	r6,r6,r10
1937	vshr.u32	d24,d7,#19
1938	add	r10,r10,r0,ror#2
1939	eor	r12,r12,r4
1940	vsli.32	d24,d7,#13
1941	add	r9,r9,r2
1942	eor	r2,r7,r8
1943	veor	d25,d25,d24
1944	eor	r0,r6,r6,ror#5
1945	add	r10,r10,r12
1946	vadd.i32	d0,d0,d25
1947	and	r2,r2,r6
1948	eor	r12,r0,r6,ror#19
1949	vshr.u32	d24,d0,#17
1950	eor	r0,r10,r10,ror#11
1951	eor	r2,r2,r8
1952	vsli.32	d24,d0,#15
1953	add	r9,r9,r12,ror#6
1954	eor	r12,r10,r11
1955	vshr.u32	d25,d0,#10
1956	eor	r0,r0,r10,ror#20
1957	add	r9,r9,r2
1958	veor	d25,d25,d24
1959	ldr	r2,[sp,#12]
1960	and	r3,r3,r12
1961	vshr.u32	d24,d0,#19
1962	add	r5,r5,r9
1963	add	r9,r9,r0,ror#2
1964	eor	r3,r3,r11
1965	vld1.32	{q8},[r14,:128]!
1966	add	r8,r8,r2
1967	vsli.32	d24,d0,#13
1968	eor	r2,r6,r7
1969	eor	r0,r5,r5,ror#5
1970	veor	d25,d25,d24
1971	add	r9,r9,r3
1972	and	r2,r2,r5
1973	vadd.i32	d1,d1,d25
1974	eor	r3,r0,r5,ror#19
1975	eor	r0,r9,r9,ror#11
1976	vadd.i32	q8,q8,q0
1977	eor	r2,r2,r7
1978	add	r8,r8,r3,ror#6
1979	eor	r3,r9,r10
1980	eor	r0,r0,r9,ror#20
1981	add	r8,r8,r2
1982	ldr	r2,[sp,#16]
1983	and	r12,r12,r3
1984	add	r4,r4,r8
1985	vst1.32	{q8},[r1,:128]!
1986	add	r8,r8,r0,ror#2
1987	eor	r12,r12,r10
1988	vext.8	q8,q1,q2,#4
1989	add	r7,r7,r2
1990	eor	r2,r5,r6
1991	eor	r0,r4,r4,ror#5
1992	vext.8	q9,q3,q0,#4
1993	add	r8,r8,r12
1994	and	r2,r2,r4
1995	eor	r12,r0,r4,ror#19
1996	vshr.u32	q10,q8,#7
1997	eor	r0,r8,r8,ror#11
1998	eor	r2,r2,r6
1999	vadd.i32	q1,q1,q9
2000	add	r7,r7,r12,ror#6
2001	eor	r12,r8,r9
2002	vshr.u32	q9,q8,#3
2003	eor	r0,r0,r8,ror#20
2004	add	r7,r7,r2
2005	vsli.32	q10,q8,#25
2006	ldr	r2,[sp,#20]
2007	and	r3,r3,r12
2008	vshr.u32	q11,q8,#18
2009	add	r11,r11,r7
2010	add	r7,r7,r0,ror#2
2011	eor	r3,r3,r9
2012	veor	q9,q9,q10
2013	add	r6,r6,r2
2014	vsli.32	q11,q8,#14
2015	eor	r2,r4,r5
2016	eor	r0,r11,r11,ror#5
2017	vshr.u32	d24,d1,#17
2018	add	r7,r7,r3
2019	and	r2,r2,r11
2020	veor	q9,q9,q11
2021	eor	r3,r0,r11,ror#19
2022	eor	r0,r7,r7,ror#11
2023	vsli.32	d24,d1,#15
2024	eor	r2,r2,r5
2025	add	r6,r6,r3,ror#6
2026	vshr.u32	d25,d1,#10
2027	eor	r3,r7,r8
2028	eor	r0,r0,r7,ror#20
2029	vadd.i32	q1,q1,q9
2030	add	r6,r6,r2
2031	ldr	r2,[sp,#24]
2032	veor	d25,d25,d24
2033	and	r12,r12,r3
2034	add	r10,r10,r6
2035	vshr.u32	d24,d1,#19
2036	add	r6,r6,r0,ror#2
2037	eor	r12,r12,r8
2038	vsli.32	d24,d1,#13
2039	add	r5,r5,r2
2040	eor	r2,r11,r4
2041	veor	d25,d25,d24
2042	eor	r0,r10,r10,ror#5
2043	add	r6,r6,r12
2044	vadd.i32	d2,d2,d25
2045	and	r2,r2,r10
2046	eor	r12,r0,r10,ror#19
2047	vshr.u32	d24,d2,#17
2048	eor	r0,r6,r6,ror#11
2049	eor	r2,r2,r4
2050	vsli.32	d24,d2,#15
2051	add	r5,r5,r12,ror#6
2052	eor	r12,r6,r7
2053	vshr.u32	d25,d2,#10
2054	eor	r0,r0,r6,ror#20
2055	add	r5,r5,r2
2056	veor	d25,d25,d24
2057	ldr	r2,[sp,#28]
2058	and	r3,r3,r12
2059	vshr.u32	d24,d2,#19
2060	add	r9,r9,r5
2061	add	r5,r5,r0,ror#2
2062	eor	r3,r3,r7
2063	vld1.32	{q8},[r14,:128]!
2064	add	r4,r4,r2
2065	vsli.32	d24,d2,#13
2066	eor	r2,r10,r11
2067	eor	r0,r9,r9,ror#5
2068	veor	d25,d25,d24
2069	add	r5,r5,r3
2070	and	r2,r2,r9
2071	vadd.i32	d3,d3,d25
2072	eor	r3,r0,r9,ror#19
2073	eor	r0,r5,r5,ror#11
2074	vadd.i32	q8,q8,q1
2075	eor	r2,r2,r11
2076	add	r4,r4,r3,ror#6
2077	eor	r3,r5,r6
2078	eor	r0,r0,r5,ror#20
2079	add	r4,r4,r2
2080	ldr	r2,[sp,#32]
2081	and	r12,r12,r3
2082	add	r8,r8,r4
2083	vst1.32	{q8},[r1,:128]!
2084	add	r4,r4,r0,ror#2
2085	eor	r12,r12,r6
2086	vext.8	q8,q2,q3,#4
2087	add	r11,r11,r2
2088	eor	r2,r9,r10
2089	eor	r0,r8,r8,ror#5
2090	vext.8	q9,q0,q1,#4
2091	add	r4,r4,r12
2092	and	r2,r2,r8
2093	eor	r12,r0,r8,ror#19
2094	vshr.u32	q10,q8,#7
2095	eor	r0,r4,r4,ror#11
2096	eor	r2,r2,r10
2097	vadd.i32	q2,q2,q9
2098	add	r11,r11,r12,ror#6
2099	eor	r12,r4,r5
2100	vshr.u32	q9,q8,#3
2101	eor	r0,r0,r4,ror#20
2102	add	r11,r11,r2
2103	vsli.32	q10,q8,#25
2104	ldr	r2,[sp,#36]
2105	and	r3,r3,r12
2106	vshr.u32	q11,q8,#18
2107	add	r7,r7,r11
2108	add	r11,r11,r0,ror#2
2109	eor	r3,r3,r5
2110	veor	q9,q9,q10
2111	add	r10,r10,r2
2112	vsli.32	q11,q8,#14
2113	eor	r2,r8,r9
2114	eor	r0,r7,r7,ror#5
2115	vshr.u32	d24,d3,#17
2116	add	r11,r11,r3
2117	and	r2,r2,r7
2118	veor	q9,q9,q11
2119	eor	r3,r0,r7,ror#19
2120	eor	r0,r11,r11,ror#11
2121	vsli.32	d24,d3,#15
2122	eor	r2,r2,r9
2123	add	r10,r10,r3,ror#6
2124	vshr.u32	d25,d3,#10
2125	eor	r3,r11,r4
2126	eor	r0,r0,r11,ror#20
2127	vadd.i32	q2,q2,q9
2128	add	r10,r10,r2
2129	ldr	r2,[sp,#40]
2130	veor	d25,d25,d24
2131	and	r12,r12,r3
2132	add	r6,r6,r10
2133	vshr.u32	d24,d3,#19
2134	add	r10,r10,r0,ror#2
2135	eor	r12,r12,r4
2136	vsli.32	d24,d3,#13
2137	add	r9,r9,r2
2138	eor	r2,r7,r8
2139	veor	d25,d25,d24
2140	eor	r0,r6,r6,ror#5
2141	add	r10,r10,r12
2142	vadd.i32	d4,d4,d25
2143	and	r2,r2,r6
2144	eor	r12,r0,r6,ror#19
2145	vshr.u32	d24,d4,#17
2146	eor	r0,r10,r10,ror#11
2147	eor	r2,r2,r8
2148	vsli.32	d24,d4,#15
2149	add	r9,r9,r12,ror#6
2150	eor	r12,r10,r11
2151	vshr.u32	d25,d4,#10
2152	eor	r0,r0,r10,ror#20
2153	add	r9,r9,r2
2154	veor	d25,d25,d24
2155	ldr	r2,[sp,#44]
2156	and	r3,r3,r12
2157	vshr.u32	d24,d4,#19
2158	add	r5,r5,r9
2159	add	r9,r9,r0,ror#2
2160	eor	r3,r3,r11
2161	vld1.32	{q8},[r14,:128]!
2162	add	r8,r8,r2
2163	vsli.32	d24,d4,#13
2164	eor	r2,r6,r7
2165	eor	r0,r5,r5,ror#5
2166	veor	d25,d25,d24
2167	add	r9,r9,r3
2168	and	r2,r2,r5
2169	vadd.i32	d5,d5,d25
2170	eor	r3,r0,r5,ror#19
2171	eor	r0,r9,r9,ror#11
2172	vadd.i32	q8,q8,q2
2173	eor	r2,r2,r7
2174	add	r8,r8,r3,ror#6
2175	eor	r3,r9,r10
2176	eor	r0,r0,r9,ror#20
2177	add	r8,r8,r2
2178	ldr	r2,[sp,#48]
2179	and	r12,r12,r3
2180	add	r4,r4,r8
2181	vst1.32	{q8},[r1,:128]!
2182	add	r8,r8,r0,ror#2
2183	eor	r12,r12,r10
2184	vext.8	q8,q3,q0,#4
2185	add	r7,r7,r2
2186	eor	r2,r5,r6
2187	eor	r0,r4,r4,ror#5
2188	vext.8	q9,q1,q2,#4
2189	add	r8,r8,r12
2190	and	r2,r2,r4
2191	eor	r12,r0,r4,ror#19
2192	vshr.u32	q10,q8,#7
2193	eor	r0,r8,r8,ror#11
2194	eor	r2,r2,r6
2195	vadd.i32	q3,q3,q9
2196	add	r7,r7,r12,ror#6
2197	eor	r12,r8,r9
2198	vshr.u32	q9,q8,#3
2199	eor	r0,r0,r8,ror#20
2200	add	r7,r7,r2
2201	vsli.32	q10,q8,#25
2202	ldr	r2,[sp,#52]
2203	and	r3,r3,r12
2204	vshr.u32	q11,q8,#18
2205	add	r11,r11,r7
2206	add	r7,r7,r0,ror#2
2207	eor	r3,r3,r9
2208	veor	q9,q9,q10
2209	add	r6,r6,r2
2210	vsli.32	q11,q8,#14
2211	eor	r2,r4,r5
2212	eor	r0,r11,r11,ror#5
2213	vshr.u32	d24,d5,#17
2214	add	r7,r7,r3
2215	and	r2,r2,r11
2216	veor	q9,q9,q11
2217	eor	r3,r0,r11,ror#19
2218	eor	r0,r7,r7,ror#11
2219	vsli.32	d24,d5,#15
2220	eor	r2,r2,r5
2221	add	r6,r6,r3,ror#6
2222	vshr.u32	d25,d5,#10
2223	eor	r3,r7,r8
2224	eor	r0,r0,r7,ror#20
2225	vadd.i32	q3,q3,q9
2226	add	r6,r6,r2
2227	ldr	r2,[sp,#56]
2228	veor	d25,d25,d24
2229	and	r12,r12,r3
2230	add	r10,r10,r6
2231	vshr.u32	d24,d5,#19
2232	add	r6,r6,r0,ror#2
2233	eor	r12,r12,r8
2234	vsli.32	d24,d5,#13
2235	add	r5,r5,r2
2236	eor	r2,r11,r4
2237	veor	d25,d25,d24
2238	eor	r0,r10,r10,ror#5
2239	add	r6,r6,r12
2240	vadd.i32	d6,d6,d25
2241	and	r2,r2,r10
2242	eor	r12,r0,r10,ror#19
2243	vshr.u32	d24,d6,#17
2244	eor	r0,r6,r6,ror#11
2245	eor	r2,r2,r4
2246	vsli.32	d24,d6,#15
2247	add	r5,r5,r12,ror#6
2248	eor	r12,r6,r7
2249	vshr.u32	d25,d6,#10
2250	eor	r0,r0,r6,ror#20
2251	add	r5,r5,r2
2252	veor	d25,d25,d24
2253	ldr	r2,[sp,#60]
2254	and	r3,r3,r12
2255	vshr.u32	d24,d6,#19
2256	add	r9,r9,r5
2257	add	r5,r5,r0,ror#2
2258	eor	r3,r3,r7
2259	vld1.32	{q8},[r14,:128]!
2260	add	r4,r4,r2
2261	vsli.32	d24,d6,#13
2262	eor	r2,r10,r11
2263	eor	r0,r9,r9,ror#5
2264	veor	d25,d25,d24
2265	add	r5,r5,r3
2266	and	r2,r2,r9
2267	vadd.i32	d7,d7,d25
2268	eor	r3,r0,r9,ror#19
2269	eor	r0,r5,r5,ror#11
2270	vadd.i32	q8,q8,q3
2271	eor	r2,r2,r11
2272	add	r4,r4,r3,ror#6
2273	eor	r3,r5,r6
2274	eor	r0,r0,r5,ror#20
2275	add	r4,r4,r2
2276	ldr	r2,[r14]
2277	and	r12,r12,r3
2278	add	r8,r8,r4
2279	vst1.32	{q8},[r1,:128]!
2280	add	r4,r4,r0,ror#2
2281	eor	r12,r12,r6
2282	teq	r2,#0				@ check for K256 terminator
2283	ldr	r2,[sp,#0]
2284	sub	r1,r1,#64
2285	bne	.L_00_48
2286
2287	ldr		r1,[sp,#68]
2288	ldr		r0,[sp,#72]
2289	sub		r14,r14,#256	@ rewind r14
2290	teq		r1,r0
2291	it		eq
2292	subeq		r1,r1,#64		@ avoid SEGV
2293	vld1.8		{q0},[r1]!		@ load next input block
2294	vld1.8		{q1},[r1]!
2295	vld1.8		{q2},[r1]!
2296	vld1.8		{q3},[r1]!
2297	it		ne
2298	strne		r1,[sp,#68]
2299	mov		r1,sp
2300	add	r11,r11,r2
2301	eor	r2,r9,r10
2302	eor	r0,r8,r8,ror#5
2303	add	r4,r4,r12
2304	vld1.32	{q8},[r14,:128]!
2305	and	r2,r2,r8
2306	eor	r12,r0,r8,ror#19
2307	eor	r0,r4,r4,ror#11
2308	eor	r2,r2,r10
2309	vrev32.8	q0,q0
2310	add	r11,r11,r12,ror#6
2311	eor	r12,r4,r5
2312	eor	r0,r0,r4,ror#20
2313	add	r11,r11,r2
2314	vadd.i32	q8,q8,q0
2315	ldr	r2,[sp,#4]
2316	and	r3,r3,r12
2317	add	r7,r7,r11
2318	add	r11,r11,r0,ror#2
2319	eor	r3,r3,r5
2320	add	r10,r10,r2
2321	eor	r2,r8,r9
2322	eor	r0,r7,r7,ror#5
2323	add	r11,r11,r3
2324	and	r2,r2,r7
2325	eor	r3,r0,r7,ror#19
2326	eor	r0,r11,r11,ror#11
2327	eor	r2,r2,r9
2328	add	r10,r10,r3,ror#6
2329	eor	r3,r11,r4
2330	eor	r0,r0,r11,ror#20
2331	add	r10,r10,r2
2332	ldr	r2,[sp,#8]
2333	and	r12,r12,r3
2334	add	r6,r6,r10
2335	add	r10,r10,r0,ror#2
2336	eor	r12,r12,r4
2337	add	r9,r9,r2
2338	eor	r2,r7,r8
2339	eor	r0,r6,r6,ror#5
2340	add	r10,r10,r12
2341	and	r2,r2,r6
2342	eor	r12,r0,r6,ror#19
2343	eor	r0,r10,r10,ror#11
2344	eor	r2,r2,r8
2345	add	r9,r9,r12,ror#6
2346	eor	r12,r10,r11
2347	eor	r0,r0,r10,ror#20
2348	add	r9,r9,r2
2349	ldr	r2,[sp,#12]
2350	and	r3,r3,r12
2351	add	r5,r5,r9
2352	add	r9,r9,r0,ror#2
2353	eor	r3,r3,r11
2354	add	r8,r8,r2
2355	eor	r2,r6,r7
2356	eor	r0,r5,r5,ror#5
2357	add	r9,r9,r3
2358	and	r2,r2,r5
2359	eor	r3,r0,r5,ror#19
2360	eor	r0,r9,r9,ror#11
2361	eor	r2,r2,r7
2362	add	r8,r8,r3,ror#6
2363	eor	r3,r9,r10
2364	eor	r0,r0,r9,ror#20
2365	add	r8,r8,r2
2366	ldr	r2,[sp,#16]
2367	and	r12,r12,r3
2368	add	r4,r4,r8
2369	add	r8,r8,r0,ror#2
2370	eor	r12,r12,r10
2371	vst1.32	{q8},[r1,:128]!
2372	add	r7,r7,r2
2373	eor	r2,r5,r6
2374	eor	r0,r4,r4,ror#5
2375	add	r8,r8,r12
2376	vld1.32	{q8},[r14,:128]!
2377	and	r2,r2,r4
2378	eor	r12,r0,r4,ror#19
2379	eor	r0,r8,r8,ror#11
2380	eor	r2,r2,r6
2381	vrev32.8	q1,q1
2382	add	r7,r7,r12,ror#6
2383	eor	r12,r8,r9
2384	eor	r0,r0,r8,ror#20
2385	add	r7,r7,r2
2386	vadd.i32	q8,q8,q1
2387	ldr	r2,[sp,#20]
2388	and	r3,r3,r12
2389	add	r11,r11,r7
2390	add	r7,r7,r0,ror#2
2391	eor	r3,r3,r9
2392	add	r6,r6,r2
2393	eor	r2,r4,r5
2394	eor	r0,r11,r11,ror#5
2395	add	r7,r7,r3
2396	and	r2,r2,r11
2397	eor	r3,r0,r11,ror#19
2398	eor	r0,r7,r7,ror#11
2399	eor	r2,r2,r5
2400	add	r6,r6,r3,ror#6
2401	eor	r3,r7,r8
2402	eor	r0,r0,r7,ror#20
2403	add	r6,r6,r2
2404	ldr	r2,[sp,#24]
2405	and	r12,r12,r3
2406	add	r10,r10,r6
2407	add	r6,r6,r0,ror#2
2408	eor	r12,r12,r8
2409	add	r5,r5,r2
2410	eor	r2,r11,r4
2411	eor	r0,r10,r10,ror#5
2412	add	r6,r6,r12
2413	and	r2,r2,r10
2414	eor	r12,r0,r10,ror#19
2415	eor	r0,r6,r6,ror#11
2416	eor	r2,r2,r4
2417	add	r5,r5,r12,ror#6
2418	eor	r12,r6,r7
2419	eor	r0,r0,r6,ror#20
2420	add	r5,r5,r2
2421	ldr	r2,[sp,#28]
2422	and	r3,r3,r12
2423	add	r9,r9,r5
2424	add	r5,r5,r0,ror#2
2425	eor	r3,r3,r7
2426	add	r4,r4,r2
2427	eor	r2,r10,r11
2428	eor	r0,r9,r9,ror#5
2429	add	r5,r5,r3
2430	and	r2,r2,r9
2431	eor	r3,r0,r9,ror#19
2432	eor	r0,r5,r5,ror#11
2433	eor	r2,r2,r11
2434	add	r4,r4,r3,ror#6
2435	eor	r3,r5,r6
2436	eor	r0,r0,r5,ror#20
2437	add	r4,r4,r2
2438	ldr	r2,[sp,#32]
2439	and	r12,r12,r3
2440	add	r8,r8,r4
2441	add	r4,r4,r0,ror#2
2442	eor	r12,r12,r6
2443	vst1.32	{q8},[r1,:128]!
2444	add	r11,r11,r2
2445	eor	r2,r9,r10
2446	eor	r0,r8,r8,ror#5
2447	add	r4,r4,r12
2448	vld1.32	{q8},[r14,:128]!
2449	and	r2,r2,r8
2450	eor	r12,r0,r8,ror#19
2451	eor	r0,r4,r4,ror#11
2452	eor	r2,r2,r10
2453	vrev32.8	q2,q2
2454	add	r11,r11,r12,ror#6
2455	eor	r12,r4,r5
2456	eor	r0,r0,r4,ror#20
2457	add	r11,r11,r2
2458	vadd.i32	q8,q8,q2
2459	ldr	r2,[sp,#36]
2460	and	r3,r3,r12
2461	add	r7,r7,r11
2462	add	r11,r11,r0,ror#2
2463	eor	r3,r3,r5
2464	add	r10,r10,r2
2465	eor	r2,r8,r9
2466	eor	r0,r7,r7,ror#5
2467	add	r11,r11,r3
2468	and	r2,r2,r7
2469	eor	r3,r0,r7,ror#19
2470	eor	r0,r11,r11,ror#11
2471	eor	r2,r2,r9
2472	add	r10,r10,r3,ror#6
2473	eor	r3,r11,r4
2474	eor	r0,r0,r11,ror#20
2475	add	r10,r10,r2
2476	ldr	r2,[sp,#40]
2477	and	r12,r12,r3
2478	add	r6,r6,r10
2479	add	r10,r10,r0,ror#2
2480	eor	r12,r12,r4
2481	add	r9,r9,r2
2482	eor	r2,r7,r8
2483	eor	r0,r6,r6,ror#5
2484	add	r10,r10,r12
2485	and	r2,r2,r6
2486	eor	r12,r0,r6,ror#19
2487	eor	r0,r10,r10,ror#11
2488	eor	r2,r2,r8
2489	add	r9,r9,r12,ror#6
2490	eor	r12,r10,r11
2491	eor	r0,r0,r10,ror#20
2492	add	r9,r9,r2
2493	ldr	r2,[sp,#44]
2494	and	r3,r3,r12
2495	add	r5,r5,r9
2496	add	r9,r9,r0,ror#2
2497	eor	r3,r3,r11
2498	add	r8,r8,r2
2499	eor	r2,r6,r7
2500	eor	r0,r5,r5,ror#5
2501	add	r9,r9,r3
2502	and	r2,r2,r5
2503	eor	r3,r0,r5,ror#19
2504	eor	r0,r9,r9,ror#11
2505	eor	r2,r2,r7
2506	add	r8,r8,r3,ror#6
2507	eor	r3,r9,r10
2508	eor	r0,r0,r9,ror#20
2509	add	r8,r8,r2
2510	ldr	r2,[sp,#48]
2511	and	r12,r12,r3
2512	add	r4,r4,r8
2513	add	r8,r8,r0,ror#2
2514	eor	r12,r12,r10
2515	vst1.32	{q8},[r1,:128]!
2516	add	r7,r7,r2
2517	eor	r2,r5,r6
2518	eor	r0,r4,r4,ror#5
2519	add	r8,r8,r12
2520	vld1.32	{q8},[r14,:128]!
2521	and	r2,r2,r4
2522	eor	r12,r0,r4,ror#19
2523	eor	r0,r8,r8,ror#11
2524	eor	r2,r2,r6
2525	vrev32.8	q3,q3
2526	add	r7,r7,r12,ror#6
2527	eor	r12,r8,r9
2528	eor	r0,r0,r8,ror#20
2529	add	r7,r7,r2
2530	vadd.i32	q8,q8,q3
2531	ldr	r2,[sp,#52]
2532	and	r3,r3,r12
2533	add	r11,r11,r7
2534	add	r7,r7,r0,ror#2
2535	eor	r3,r3,r9
2536	add	r6,r6,r2
2537	eor	r2,r4,r5
2538	eor	r0,r11,r11,ror#5
2539	add	r7,r7,r3
2540	and	r2,r2,r11
2541	eor	r3,r0,r11,ror#19
2542	eor	r0,r7,r7,ror#11
2543	eor	r2,r2,r5
2544	add	r6,r6,r3,ror#6
2545	eor	r3,r7,r8
2546	eor	r0,r0,r7,ror#20
2547	add	r6,r6,r2
2548	ldr	r2,[sp,#56]
2549	and	r12,r12,r3
2550	add	r10,r10,r6
2551	add	r6,r6,r0,ror#2
2552	eor	r12,r12,r8
2553	add	r5,r5,r2
2554	eor	r2,r11,r4
2555	eor	r0,r10,r10,ror#5
2556	add	r6,r6,r12
2557	and	r2,r2,r10
2558	eor	r12,r0,r10,ror#19
2559	eor	r0,r6,r6,ror#11
2560	eor	r2,r2,r4
2561	add	r5,r5,r12,ror#6
2562	eor	r12,r6,r7
2563	eor	r0,r0,r6,ror#20
2564	add	r5,r5,r2
2565	ldr	r2,[sp,#60]
2566	and	r3,r3,r12
2567	add	r9,r9,r5
2568	add	r5,r5,r0,ror#2
2569	eor	r3,r3,r7
2570	add	r4,r4,r2
2571	eor	r2,r10,r11
2572	eor	r0,r9,r9,ror#5
2573	add	r5,r5,r3
2574	and	r2,r2,r9
2575	eor	r3,r0,r9,ror#19
2576	eor	r0,r5,r5,ror#11
2577	eor	r2,r2,r11
2578	add	r4,r4,r3,ror#6
2579	eor	r3,r5,r6
2580	eor	r0,r0,r5,ror#20
2581	add	r4,r4,r2
2582	ldr	r2,[sp,#64]
2583	and	r12,r12,r3
2584	add	r8,r8,r4
2585	add	r4,r4,r0,ror#2
2586	eor	r12,r12,r6
2587	vst1.32	{q8},[r1,:128]!
2588	ldr	r0,[r2,#0]
2589	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2590	ldr	r12,[r2,#4]
2591	ldr	r3,[r2,#8]
2592	ldr	r1,[r2,#12]
2593	add	r4,r4,r0			@ accumulate
2594	ldr	r0,[r2,#16]
2595	add	r5,r5,r12
2596	ldr	r12,[r2,#20]
2597	add	r6,r6,r3
2598	ldr	r3,[r2,#24]
2599	add	r7,r7,r1
2600	ldr	r1,[r2,#28]
2601	add	r8,r8,r0
2602	str	r4,[r2],#4
2603	add	r9,r9,r12
2604	str	r5,[r2],#4
2605	add	r10,r10,r3
2606	str	r6,[r2],#4
2607	add	r11,r11,r1
2608	str	r7,[r2],#4
2609	stmia	r2,{r8-r11}
2610
2611	ittte	ne
2612	movne	r1,sp
2613	ldrne	r2,[sp,#0]
2614	eorne	r12,r12,r12
2615	ldreq	sp,[sp,#76]			@ restore original sp
2616	itt	ne
2617	eorne	r3,r5,r6
2618	bne	.L_00_48
2619
2620	ldmia	sp!,{r4-r12,pc}
2621.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2622#endif
2623#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2624
2625# ifdef __thumb2__
2626#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2627# else
2628#  define INST(a,b,c,d)	.byte	a,b,c,d
2629# endif
2630
2631.type	sha256_block_data_order_armv8,%function
2632.align	5
2633sha256_block_data_order_armv8:
2634.LARMv8:
2635	vld1.32	{q0,q1},[r0]
2636# ifdef __thumb2__
2637	adr	r3,.LARMv8
2638	sub	r3,r3,#.LARMv8-K256
2639# else
2640	sub	r3,r3,#256+32
2641# endif
2642	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2643
2644.Loop_v8:
2645	vld1.8		{q8-q9},[r1]!
2646	vld1.8		{q10-q11},[r1]!
2647	vld1.32		{q12},[r3]!
2648	vrev32.8	q8,q8
2649	vrev32.8	q9,q9
2650	vrev32.8	q10,q10
2651	vrev32.8	q11,q11
2652	vmov		q14,q0	@ offload
2653	vmov		q15,q1
2654	teq		r1,r2
2655	vld1.32		{q13},[r3]!
2656	vadd.i32	q12,q12,q8
2657	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2658	vmov		q2,q0
2659	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2660	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2661	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2662	vld1.32		{q12},[r3]!
2663	vadd.i32	q13,q13,q9
2664	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2665	vmov		q2,q0
2666	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2667	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2668	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2669	vld1.32		{q13},[r3]!
2670	vadd.i32	q12,q12,q10
2671	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2672	vmov		q2,q0
2673	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2674	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2675	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2676	vld1.32		{q12},[r3]!
2677	vadd.i32	q13,q13,q11
2678	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2679	vmov		q2,q0
2680	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2681	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2682	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2683	vld1.32		{q13},[r3]!
2684	vadd.i32	q12,q12,q8
2685	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2686	vmov		q2,q0
2687	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2688	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2689	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2690	vld1.32		{q12},[r3]!
2691	vadd.i32	q13,q13,q9
2692	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2693	vmov		q2,q0
2694	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2695	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2696	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2697	vld1.32		{q13},[r3]!
2698	vadd.i32	q12,q12,q10
2699	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2700	vmov		q2,q0
2701	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2702	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2703	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2704	vld1.32		{q12},[r3]!
2705	vadd.i32	q13,q13,q11
2706	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2707	vmov		q2,q0
2708	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2709	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2710	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2711	vld1.32		{q13},[r3]!
2712	vadd.i32	q12,q12,q8
2713	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2714	vmov		q2,q0
2715	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2716	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2717	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2718	vld1.32		{q12},[r3]!
2719	vadd.i32	q13,q13,q9
2720	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2721	vmov		q2,q0
2722	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2723	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2724	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2725	vld1.32		{q13},[r3]!
2726	vadd.i32	q12,q12,q10
2727	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2728	vmov		q2,q0
2729	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2730	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2731	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2732	vld1.32		{q12},[r3]!
2733	vadd.i32	q13,q13,q11
2734	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2735	vmov		q2,q0
2736	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2737	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2738	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2739	vld1.32		{q13},[r3]!
2740	vadd.i32	q12,q12,q8
2741	vmov		q2,q0
2742	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2743	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2744
2745	vld1.32		{q12},[r3]!
2746	vadd.i32	q13,q13,q9
2747	vmov		q2,q0
2748	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2749	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2750
2751	vld1.32		{q13},[r3]
2752	vadd.i32	q12,q12,q10
2753	sub		r3,r3,#256-16	@ rewind
2754	vmov		q2,q0
2755	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2756	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2757
2758	vadd.i32	q13,q13,q11
2759	vmov		q2,q0
2760	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2761	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2762
2763	vadd.i32	q0,q0,q14
2764	vadd.i32	q1,q1,q15
2765	it		ne
2766	bne		.Loop_v8
2767
2768	vst1.32		{q0,q1},[r0]
2769
2770	bx	lr		@ bx lr
2771.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2772#endif
2773.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2774.align	2
2775#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2776.comm   OPENSSL_armcap_P,4,4
2777#endif
2778