1
2@ ====================================================================
3@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4@ project. The module is, however, dual licensed under OpenSSL and
5@ CRYPTOGAMS licenses depending on where you obtain it. For further
6@ details see http://www.openssl.org/~appro/cryptogams/.
7@
8@ Permission to use under GPL terms is granted.
9@ ====================================================================
10
11@ SHA256 block procedure for ARMv4. May 2007.
12
13@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
14@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
15@ byte [on single-issue Xscale PXA250 core].
16
17@ July 2010.
18@
19@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
20@ Cortex A8 core and ~20 cycles per processed byte.
21
22@ February 2011.
23@
24@ Profiler-assisted and platform-specific optimization resulted in 16%
25@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
26
27@ September 2013.
28@
29@ Add NEON implementation. On Cortex A8 it was measured to process one
30@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
31@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
32@ code (meaning that latter performs sub-optimally, nothing was done
33@ about it).
34
35@ May 2014.
36@
37@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
38
39#ifndef __KERNEL__
40# include "arm_arch.h"
41#else
42# define __ARM_ARCH__ __LINUX_ARM_ARCH__
43# define __ARM_MAX_ARCH__ 7
44#endif
45
46.text
47#if __ARM_ARCH__<7
48.code	32
49#else
50.syntax unified
51# ifdef __thumb2__
52.thumb
53# else
54.code   32
55# endif
56#endif
57
58.type	K256,%object
59.align	5
60K256:
61.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
62.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
63.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
64.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
65.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
66.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
67.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
68.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
69.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
70.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
71.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
72.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
73.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
74.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
75.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
76.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
77.size	K256,.-K256
78.word	0				@ terminator
79#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
80.LOPENSSL_armcap:
81.word	OPENSSL_armcap_P-sha256_block_data_order
82#endif
83.align	5
84
85.global	sha256_block_data_order
86.type	sha256_block_data_order,%function
87sha256_block_data_order:
88#if __ARM_ARCH__<7
89	sub	r3,pc,#8		@ sha256_block_data_order
90#else
91	adr	r3,sha256_block_data_order
92#endif
93#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
94	ldr	r12,.LOPENSSL_armcap
95	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
96	tst	r12,#ARMV8_SHA256
97	bne	.LARMv8
98	tst	r12,#ARMV7_NEON
99	bne	.LNEON
100#endif
101	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
102	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
103	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
104	sub	r14,r3,#256+32	@ K256
105	sub	sp,sp,#16*4		@ alloca(X[16])
106.Loop:
107# if __ARM_ARCH__>=7
108	ldr	r2,[r1],#4
109# else
110	ldrb	r2,[r1,#3]
111# endif
112	eor	r3,r5,r6		@ magic
113	eor	r12,r12,r12
114#if __ARM_ARCH__>=7
115	@ ldr	r2,[r1],#4			@ 0
116# if 0==15
117	str	r1,[sp,#17*4]			@ make room for r1
118# endif
119	eor	r0,r8,r8,ror#5
120	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
121	eor	r0,r0,r8,ror#19	@ Sigma1(e)
122	rev	r2,r2
123#else
124	@ ldrb	r2,[r1,#3]			@ 0
125	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
126	ldrb	r12,[r1,#2]
127	ldrb	r0,[r1,#1]
128	orr	r2,r2,r12,lsl#8
129	ldrb	r12,[r1],#4
130	orr	r2,r2,r0,lsl#16
131# if 0==15
132	str	r1,[sp,#17*4]			@ make room for r1
133# endif
134	eor	r0,r8,r8,ror#5
135	orr	r2,r2,r12,lsl#24
136	eor	r0,r0,r8,ror#19	@ Sigma1(e)
137#endif
138	ldr	r12,[r14],#4			@ *K256++
139	add	r11,r11,r2			@ h+=X[i]
140	str	r2,[sp,#0*4]
141	eor	r2,r9,r10
142	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
143	and	r2,r2,r8
144	add	r11,r11,r12			@ h+=K256[i]
145	eor	r2,r2,r10			@ Ch(e,f,g)
146	eor	r0,r4,r4,ror#11
147	add	r11,r11,r2			@ h+=Ch(e,f,g)
148#if 0==31
149	and	r12,r12,#0xff
150	cmp	r12,#0xf2			@ done?
151#endif
152#if 0<15
153# if __ARM_ARCH__>=7
154	ldr	r2,[r1],#4			@ prefetch
155# else
156	ldrb	r2,[r1,#3]
157# endif
158	eor	r12,r4,r5			@ a^b, b^c in next round
159#else
160	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
161	eor	r12,r4,r5			@ a^b, b^c in next round
162	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
163#endif
164	eor	r0,r0,r4,ror#20	@ Sigma0(a)
165	and	r3,r3,r12			@ (b^c)&=(a^b)
166	add	r7,r7,r11			@ d+=h
167	eor	r3,r3,r5			@ Maj(a,b,c)
168	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
169	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
170#if __ARM_ARCH__>=7
171	@ ldr	r2,[r1],#4			@ 1
172# if 1==15
173	str	r1,[sp,#17*4]			@ make room for r1
174# endif
175	eor	r0,r7,r7,ror#5
176	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
177	eor	r0,r0,r7,ror#19	@ Sigma1(e)
178	rev	r2,r2
179#else
180	@ ldrb	r2,[r1,#3]			@ 1
181	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
182	ldrb	r3,[r1,#2]
183	ldrb	r0,[r1,#1]
184	orr	r2,r2,r3,lsl#8
185	ldrb	r3,[r1],#4
186	orr	r2,r2,r0,lsl#16
187# if 1==15
188	str	r1,[sp,#17*4]			@ make room for r1
189# endif
190	eor	r0,r7,r7,ror#5
191	orr	r2,r2,r3,lsl#24
192	eor	r0,r0,r7,ror#19	@ Sigma1(e)
193#endif
194	ldr	r3,[r14],#4			@ *K256++
195	add	r10,r10,r2			@ h+=X[i]
196	str	r2,[sp,#1*4]
197	eor	r2,r8,r9
198	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
199	and	r2,r2,r7
200	add	r10,r10,r3			@ h+=K256[i]
201	eor	r2,r2,r9			@ Ch(e,f,g)
202	eor	r0,r11,r11,ror#11
203	add	r10,r10,r2			@ h+=Ch(e,f,g)
204#if 1==31
205	and	r3,r3,#0xff
206	cmp	r3,#0xf2			@ done?
207#endif
208#if 1<15
209# if __ARM_ARCH__>=7
210	ldr	r2,[r1],#4			@ prefetch
211# else
212	ldrb	r2,[r1,#3]
213# endif
214	eor	r3,r11,r4			@ a^b, b^c in next round
215#else
216	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
217	eor	r3,r11,r4			@ a^b, b^c in next round
218	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
219#endif
220	eor	r0,r0,r11,ror#20	@ Sigma0(a)
221	and	r12,r12,r3			@ (b^c)&=(a^b)
222	add	r6,r6,r10			@ d+=h
223	eor	r12,r12,r4			@ Maj(a,b,c)
224	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
225	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
226#if __ARM_ARCH__>=7
227	@ ldr	r2,[r1],#4			@ 2
228# if 2==15
229	str	r1,[sp,#17*4]			@ make room for r1
230# endif
231	eor	r0,r6,r6,ror#5
232	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
233	eor	r0,r0,r6,ror#19	@ Sigma1(e)
234	rev	r2,r2
235#else
236	@ ldrb	r2,[r1,#3]			@ 2
237	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
238	ldrb	r12,[r1,#2]
239	ldrb	r0,[r1,#1]
240	orr	r2,r2,r12,lsl#8
241	ldrb	r12,[r1],#4
242	orr	r2,r2,r0,lsl#16
243# if 2==15
244	str	r1,[sp,#17*4]			@ make room for r1
245# endif
246	eor	r0,r6,r6,ror#5
247	orr	r2,r2,r12,lsl#24
248	eor	r0,r0,r6,ror#19	@ Sigma1(e)
249#endif
250	ldr	r12,[r14],#4			@ *K256++
251	add	r9,r9,r2			@ h+=X[i]
252	str	r2,[sp,#2*4]
253	eor	r2,r7,r8
254	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
255	and	r2,r2,r6
256	add	r9,r9,r12			@ h+=K256[i]
257	eor	r2,r2,r8			@ Ch(e,f,g)
258	eor	r0,r10,r10,ror#11
259	add	r9,r9,r2			@ h+=Ch(e,f,g)
260#if 2==31
261	and	r12,r12,#0xff
262	cmp	r12,#0xf2			@ done?
263#endif
264#if 2<15
265# if __ARM_ARCH__>=7
266	ldr	r2,[r1],#4			@ prefetch
267# else
268	ldrb	r2,[r1,#3]
269# endif
270	eor	r12,r10,r11			@ a^b, b^c in next round
271#else
272	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
273	eor	r12,r10,r11			@ a^b, b^c in next round
274	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
275#endif
276	eor	r0,r0,r10,ror#20	@ Sigma0(a)
277	and	r3,r3,r12			@ (b^c)&=(a^b)
278	add	r5,r5,r9			@ d+=h
279	eor	r3,r3,r11			@ Maj(a,b,c)
280	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
281	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
282#if __ARM_ARCH__>=7
283	@ ldr	r2,[r1],#4			@ 3
284# if 3==15
285	str	r1,[sp,#17*4]			@ make room for r1
286# endif
287	eor	r0,r5,r5,ror#5
288	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
289	eor	r0,r0,r5,ror#19	@ Sigma1(e)
290	rev	r2,r2
291#else
292	@ ldrb	r2,[r1,#3]			@ 3
293	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
294	ldrb	r3,[r1,#2]
295	ldrb	r0,[r1,#1]
296	orr	r2,r2,r3,lsl#8
297	ldrb	r3,[r1],#4
298	orr	r2,r2,r0,lsl#16
299# if 3==15
300	str	r1,[sp,#17*4]			@ make room for r1
301# endif
302	eor	r0,r5,r5,ror#5
303	orr	r2,r2,r3,lsl#24
304	eor	r0,r0,r5,ror#19	@ Sigma1(e)
305#endif
306	ldr	r3,[r14],#4			@ *K256++
307	add	r8,r8,r2			@ h+=X[i]
308	str	r2,[sp,#3*4]
309	eor	r2,r6,r7
310	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
311	and	r2,r2,r5
312	add	r8,r8,r3			@ h+=K256[i]
313	eor	r2,r2,r7			@ Ch(e,f,g)
314	eor	r0,r9,r9,ror#11
315	add	r8,r8,r2			@ h+=Ch(e,f,g)
316#if 3==31
317	and	r3,r3,#0xff
318	cmp	r3,#0xf2			@ done?
319#endif
320#if 3<15
321# if __ARM_ARCH__>=7
322	ldr	r2,[r1],#4			@ prefetch
323# else
324	ldrb	r2,[r1,#3]
325# endif
326	eor	r3,r9,r10			@ a^b, b^c in next round
327#else
328	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
329	eor	r3,r9,r10			@ a^b, b^c in next round
330	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
331#endif
332	eor	r0,r0,r9,ror#20	@ Sigma0(a)
333	and	r12,r12,r3			@ (b^c)&=(a^b)
334	add	r4,r4,r8			@ d+=h
335	eor	r12,r12,r10			@ Maj(a,b,c)
336	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
337	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
338#if __ARM_ARCH__>=7
339	@ ldr	r2,[r1],#4			@ 4
340# if 4==15
341	str	r1,[sp,#17*4]			@ make room for r1
342# endif
343	eor	r0,r4,r4,ror#5
344	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
345	eor	r0,r0,r4,ror#19	@ Sigma1(e)
346	rev	r2,r2
347#else
348	@ ldrb	r2,[r1,#3]			@ 4
349	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
350	ldrb	r12,[r1,#2]
351	ldrb	r0,[r1,#1]
352	orr	r2,r2,r12,lsl#8
353	ldrb	r12,[r1],#4
354	orr	r2,r2,r0,lsl#16
355# if 4==15
356	str	r1,[sp,#17*4]			@ make room for r1
357# endif
358	eor	r0,r4,r4,ror#5
359	orr	r2,r2,r12,lsl#24
360	eor	r0,r0,r4,ror#19	@ Sigma1(e)
361#endif
362	ldr	r12,[r14],#4			@ *K256++
363	add	r7,r7,r2			@ h+=X[i]
364	str	r2,[sp,#4*4]
365	eor	r2,r5,r6
366	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
367	and	r2,r2,r4
368	add	r7,r7,r12			@ h+=K256[i]
369	eor	r2,r2,r6			@ Ch(e,f,g)
370	eor	r0,r8,r8,ror#11
371	add	r7,r7,r2			@ h+=Ch(e,f,g)
372#if 4==31
373	and	r12,r12,#0xff
374	cmp	r12,#0xf2			@ done?
375#endif
376#if 4<15
377# if __ARM_ARCH__>=7
378	ldr	r2,[r1],#4			@ prefetch
379# else
380	ldrb	r2,[r1,#3]
381# endif
382	eor	r12,r8,r9			@ a^b, b^c in next round
383#else
384	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
385	eor	r12,r8,r9			@ a^b, b^c in next round
386	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
387#endif
388	eor	r0,r0,r8,ror#20	@ Sigma0(a)
389	and	r3,r3,r12			@ (b^c)&=(a^b)
390	add	r11,r11,r7			@ d+=h
391	eor	r3,r3,r9			@ Maj(a,b,c)
392	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
393	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
394#if __ARM_ARCH__>=7
395	@ ldr	r2,[r1],#4			@ 5
396# if 5==15
397	str	r1,[sp,#17*4]			@ make room for r1
398# endif
399	eor	r0,r11,r11,ror#5
400	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
401	eor	r0,r0,r11,ror#19	@ Sigma1(e)
402	rev	r2,r2
403#else
404	@ ldrb	r2,[r1,#3]			@ 5
405	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
406	ldrb	r3,[r1,#2]
407	ldrb	r0,[r1,#1]
408	orr	r2,r2,r3,lsl#8
409	ldrb	r3,[r1],#4
410	orr	r2,r2,r0,lsl#16
411# if 5==15
412	str	r1,[sp,#17*4]			@ make room for r1
413# endif
414	eor	r0,r11,r11,ror#5
415	orr	r2,r2,r3,lsl#24
416	eor	r0,r0,r11,ror#19	@ Sigma1(e)
417#endif
418	ldr	r3,[r14],#4			@ *K256++
419	add	r6,r6,r2			@ h+=X[i]
420	str	r2,[sp,#5*4]
421	eor	r2,r4,r5
422	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
423	and	r2,r2,r11
424	add	r6,r6,r3			@ h+=K256[i]
425	eor	r2,r2,r5			@ Ch(e,f,g)
426	eor	r0,r7,r7,ror#11
427	add	r6,r6,r2			@ h+=Ch(e,f,g)
428#if 5==31
429	and	r3,r3,#0xff
430	cmp	r3,#0xf2			@ done?
431#endif
432#if 5<15
433# if __ARM_ARCH__>=7
434	ldr	r2,[r1],#4			@ prefetch
435# else
436	ldrb	r2,[r1,#3]
437# endif
438	eor	r3,r7,r8			@ a^b, b^c in next round
439#else
440	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
441	eor	r3,r7,r8			@ a^b, b^c in next round
442	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
443#endif
444	eor	r0,r0,r7,ror#20	@ Sigma0(a)
445	and	r12,r12,r3			@ (b^c)&=(a^b)
446	add	r10,r10,r6			@ d+=h
447	eor	r12,r12,r8			@ Maj(a,b,c)
448	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
449	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
450#if __ARM_ARCH__>=7
451	@ ldr	r2,[r1],#4			@ 6
452# if 6==15
453	str	r1,[sp,#17*4]			@ make room for r1
454# endif
455	eor	r0,r10,r10,ror#5
456	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
457	eor	r0,r0,r10,ror#19	@ Sigma1(e)
458	rev	r2,r2
459#else
460	@ ldrb	r2,[r1,#3]			@ 6
461	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
462	ldrb	r12,[r1,#2]
463	ldrb	r0,[r1,#1]
464	orr	r2,r2,r12,lsl#8
465	ldrb	r12,[r1],#4
466	orr	r2,r2,r0,lsl#16
467# if 6==15
468	str	r1,[sp,#17*4]			@ make room for r1
469# endif
470	eor	r0,r10,r10,ror#5
471	orr	r2,r2,r12,lsl#24
472	eor	r0,r0,r10,ror#19	@ Sigma1(e)
473#endif
474	ldr	r12,[r14],#4			@ *K256++
475	add	r5,r5,r2			@ h+=X[i]
476	str	r2,[sp,#6*4]
477	eor	r2,r11,r4
478	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
479	and	r2,r2,r10
480	add	r5,r5,r12			@ h+=K256[i]
481	eor	r2,r2,r4			@ Ch(e,f,g)
482	eor	r0,r6,r6,ror#11
483	add	r5,r5,r2			@ h+=Ch(e,f,g)
484#if 6==31
485	and	r12,r12,#0xff
486	cmp	r12,#0xf2			@ done?
487#endif
488#if 6<15
489# if __ARM_ARCH__>=7
490	ldr	r2,[r1],#4			@ prefetch
491# else
492	ldrb	r2,[r1,#3]
493# endif
494	eor	r12,r6,r7			@ a^b, b^c in next round
495#else
496	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
497	eor	r12,r6,r7			@ a^b, b^c in next round
498	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
499#endif
500	eor	r0,r0,r6,ror#20	@ Sigma0(a)
501	and	r3,r3,r12			@ (b^c)&=(a^b)
502	add	r9,r9,r5			@ d+=h
503	eor	r3,r3,r7			@ Maj(a,b,c)
504	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
505	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
506#if __ARM_ARCH__>=7
507	@ ldr	r2,[r1],#4			@ 7
508# if 7==15
509	str	r1,[sp,#17*4]			@ make room for r1
510# endif
511	eor	r0,r9,r9,ror#5
512	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
513	eor	r0,r0,r9,ror#19	@ Sigma1(e)
514	rev	r2,r2
515#else
516	@ ldrb	r2,[r1,#3]			@ 7
517	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
518	ldrb	r3,[r1,#2]
519	ldrb	r0,[r1,#1]
520	orr	r2,r2,r3,lsl#8
521	ldrb	r3,[r1],#4
522	orr	r2,r2,r0,lsl#16
523# if 7==15
524	str	r1,[sp,#17*4]			@ make room for r1
525# endif
526	eor	r0,r9,r9,ror#5
527	orr	r2,r2,r3,lsl#24
528	eor	r0,r0,r9,ror#19	@ Sigma1(e)
529#endif
530	ldr	r3,[r14],#4			@ *K256++
531	add	r4,r4,r2			@ h+=X[i]
532	str	r2,[sp,#7*4]
533	eor	r2,r10,r11
534	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
535	and	r2,r2,r9
536	add	r4,r4,r3			@ h+=K256[i]
537	eor	r2,r2,r11			@ Ch(e,f,g)
538	eor	r0,r5,r5,ror#11
539	add	r4,r4,r2			@ h+=Ch(e,f,g)
540#if 7==31
541	and	r3,r3,#0xff
542	cmp	r3,#0xf2			@ done?
543#endif
544#if 7<15
545# if __ARM_ARCH__>=7
546	ldr	r2,[r1],#4			@ prefetch
547# else
548	ldrb	r2,[r1,#3]
549# endif
550	eor	r3,r5,r6			@ a^b, b^c in next round
551#else
552	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
553	eor	r3,r5,r6			@ a^b, b^c in next round
554	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
555#endif
556	eor	r0,r0,r5,ror#20	@ Sigma0(a)
557	and	r12,r12,r3			@ (b^c)&=(a^b)
558	add	r8,r8,r4			@ d+=h
559	eor	r12,r12,r6			@ Maj(a,b,c)
560	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
561	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
562#if __ARM_ARCH__>=7
563	@ ldr	r2,[r1],#4			@ 8
564# if 8==15
565	str	r1,[sp,#17*4]			@ make room for r1
566# endif
567	eor	r0,r8,r8,ror#5
568	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
569	eor	r0,r0,r8,ror#19	@ Sigma1(e)
570	rev	r2,r2
571#else
572	@ ldrb	r2,[r1,#3]			@ 8
573	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
574	ldrb	r12,[r1,#2]
575	ldrb	r0,[r1,#1]
576	orr	r2,r2,r12,lsl#8
577	ldrb	r12,[r1],#4
578	orr	r2,r2,r0,lsl#16
579# if 8==15
580	str	r1,[sp,#17*4]			@ make room for r1
581# endif
582	eor	r0,r8,r8,ror#5
583	orr	r2,r2,r12,lsl#24
584	eor	r0,r0,r8,ror#19	@ Sigma1(e)
585#endif
586	ldr	r12,[r14],#4			@ *K256++
587	add	r11,r11,r2			@ h+=X[i]
588	str	r2,[sp,#8*4]
589	eor	r2,r9,r10
590	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
591	and	r2,r2,r8
592	add	r11,r11,r12			@ h+=K256[i]
593	eor	r2,r2,r10			@ Ch(e,f,g)
594	eor	r0,r4,r4,ror#11
595	add	r11,r11,r2			@ h+=Ch(e,f,g)
596#if 8==31
597	and	r12,r12,#0xff
598	cmp	r12,#0xf2			@ done?
599#endif
600#if 8<15
601# if __ARM_ARCH__>=7
602	ldr	r2,[r1],#4			@ prefetch
603# else
604	ldrb	r2,[r1,#3]
605# endif
606	eor	r12,r4,r5			@ a^b, b^c in next round
607#else
608	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
609	eor	r12,r4,r5			@ a^b, b^c in next round
610	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
611#endif
612	eor	r0,r0,r4,ror#20	@ Sigma0(a)
613	and	r3,r3,r12			@ (b^c)&=(a^b)
614	add	r7,r7,r11			@ d+=h
615	eor	r3,r3,r5			@ Maj(a,b,c)
616	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
617	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
618#if __ARM_ARCH__>=7
619	@ ldr	r2,[r1],#4			@ 9
620# if 9==15
621	str	r1,[sp,#17*4]			@ make room for r1
622# endif
623	eor	r0,r7,r7,ror#5
624	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
625	eor	r0,r0,r7,ror#19	@ Sigma1(e)
626	rev	r2,r2
627#else
628	@ ldrb	r2,[r1,#3]			@ 9
629	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
630	ldrb	r3,[r1,#2]
631	ldrb	r0,[r1,#1]
632	orr	r2,r2,r3,lsl#8
633	ldrb	r3,[r1],#4
634	orr	r2,r2,r0,lsl#16
635# if 9==15
636	str	r1,[sp,#17*4]			@ make room for r1
637# endif
638	eor	r0,r7,r7,ror#5
639	orr	r2,r2,r3,lsl#24
640	eor	r0,r0,r7,ror#19	@ Sigma1(e)
641#endif
642	ldr	r3,[r14],#4			@ *K256++
643	add	r10,r10,r2			@ h+=X[i]
644	str	r2,[sp,#9*4]
645	eor	r2,r8,r9
646	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
647	and	r2,r2,r7
648	add	r10,r10,r3			@ h+=K256[i]
649	eor	r2,r2,r9			@ Ch(e,f,g)
650	eor	r0,r11,r11,ror#11
651	add	r10,r10,r2			@ h+=Ch(e,f,g)
652#if 9==31
653	and	r3,r3,#0xff
654	cmp	r3,#0xf2			@ done?
655#endif
656#if 9<15
657# if __ARM_ARCH__>=7
658	ldr	r2,[r1],#4			@ prefetch
659# else
660	ldrb	r2,[r1,#3]
661# endif
662	eor	r3,r11,r4			@ a^b, b^c in next round
663#else
664	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
665	eor	r3,r11,r4			@ a^b, b^c in next round
666	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
667#endif
668	eor	r0,r0,r11,ror#20	@ Sigma0(a)
669	and	r12,r12,r3			@ (b^c)&=(a^b)
670	add	r6,r6,r10			@ d+=h
671	eor	r12,r12,r4			@ Maj(a,b,c)
672	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
673	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
674#if __ARM_ARCH__>=7
675	@ ldr	r2,[r1],#4			@ 10
676# if 10==15
677	str	r1,[sp,#17*4]			@ make room for r1
678# endif
679	eor	r0,r6,r6,ror#5
680	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
681	eor	r0,r0,r6,ror#19	@ Sigma1(e)
682	rev	r2,r2
683#else
684	@ ldrb	r2,[r1,#3]			@ 10
685	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
686	ldrb	r12,[r1,#2]
687	ldrb	r0,[r1,#1]
688	orr	r2,r2,r12,lsl#8
689	ldrb	r12,[r1],#4
690	orr	r2,r2,r0,lsl#16
691# if 10==15
692	str	r1,[sp,#17*4]			@ make room for r1
693# endif
694	eor	r0,r6,r6,ror#5
695	orr	r2,r2,r12,lsl#24
696	eor	r0,r0,r6,ror#19	@ Sigma1(e)
697#endif
698	ldr	r12,[r14],#4			@ *K256++
699	add	r9,r9,r2			@ h+=X[i]
700	str	r2,[sp,#10*4]
701	eor	r2,r7,r8
702	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
703	and	r2,r2,r6
704	add	r9,r9,r12			@ h+=K256[i]
705	eor	r2,r2,r8			@ Ch(e,f,g)
706	eor	r0,r10,r10,ror#11
707	add	r9,r9,r2			@ h+=Ch(e,f,g)
708#if 10==31
709	and	r12,r12,#0xff
710	cmp	r12,#0xf2			@ done?
711#endif
712#if 10<15
713# if __ARM_ARCH__>=7
714	ldr	r2,[r1],#4			@ prefetch
715# else
716	ldrb	r2,[r1,#3]
717# endif
718	eor	r12,r10,r11			@ a^b, b^c in next round
719#else
720	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
721	eor	r12,r10,r11			@ a^b, b^c in next round
722	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
723#endif
724	eor	r0,r0,r10,ror#20	@ Sigma0(a)
725	and	r3,r3,r12			@ (b^c)&=(a^b)
726	add	r5,r5,r9			@ d+=h
727	eor	r3,r3,r11			@ Maj(a,b,c)
728	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
729	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
730#if __ARM_ARCH__>=7
731	@ ldr	r2,[r1],#4			@ 11
732# if 11==15
733	str	r1,[sp,#17*4]			@ make room for r1
734# endif
735	eor	r0,r5,r5,ror#5
736	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
737	eor	r0,r0,r5,ror#19	@ Sigma1(e)
738	rev	r2,r2
739#else
740	@ ldrb	r2,[r1,#3]			@ 11
741	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
742	ldrb	r3,[r1,#2]
743	ldrb	r0,[r1,#1]
744	orr	r2,r2,r3,lsl#8
745	ldrb	r3,[r1],#4
746	orr	r2,r2,r0,lsl#16
747# if 11==15
748	str	r1,[sp,#17*4]			@ make room for r1
749# endif
750	eor	r0,r5,r5,ror#5
751	orr	r2,r2,r3,lsl#24
752	eor	r0,r0,r5,ror#19	@ Sigma1(e)
753#endif
754	ldr	r3,[r14],#4			@ *K256++
755	add	r8,r8,r2			@ h+=X[i]
756	str	r2,[sp,#11*4]
757	eor	r2,r6,r7
758	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
759	and	r2,r2,r5
760	add	r8,r8,r3			@ h+=K256[i]
761	eor	r2,r2,r7			@ Ch(e,f,g)
762	eor	r0,r9,r9,ror#11
763	add	r8,r8,r2			@ h+=Ch(e,f,g)
764#if 11==31
765	and	r3,r3,#0xff
766	cmp	r3,#0xf2			@ done?
767#endif
768#if 11<15
769# if __ARM_ARCH__>=7
770	ldr	r2,[r1],#4			@ prefetch
771# else
772	ldrb	r2,[r1,#3]
773# endif
774	eor	r3,r9,r10			@ a^b, b^c in next round
775#else
776	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
777	eor	r3,r9,r10			@ a^b, b^c in next round
778	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
779#endif
780	eor	r0,r0,r9,ror#20	@ Sigma0(a)
781	and	r12,r12,r3			@ (b^c)&=(a^b)
782	add	r4,r4,r8			@ d+=h
783	eor	r12,r12,r10			@ Maj(a,b,c)
784	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
785	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
786#if __ARM_ARCH__>=7
787	@ ldr	r2,[r1],#4			@ 12
788# if 12==15
789	str	r1,[sp,#17*4]			@ make room for r1
790# endif
791	eor	r0,r4,r4,ror#5
792	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
793	eor	r0,r0,r4,ror#19	@ Sigma1(e)
794	rev	r2,r2
795#else
796	@ ldrb	r2,[r1,#3]			@ 12
797	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
798	ldrb	r12,[r1,#2]
799	ldrb	r0,[r1,#1]
800	orr	r2,r2,r12,lsl#8
801	ldrb	r12,[r1],#4
802	orr	r2,r2,r0,lsl#16
803# if 12==15
804	str	r1,[sp,#17*4]			@ make room for r1
805# endif
806	eor	r0,r4,r4,ror#5
807	orr	r2,r2,r12,lsl#24
808	eor	r0,r0,r4,ror#19	@ Sigma1(e)
809#endif
810	ldr	r12,[r14],#4			@ *K256++
811	add	r7,r7,r2			@ h+=X[i]
812	str	r2,[sp,#12*4]
813	eor	r2,r5,r6
814	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
815	and	r2,r2,r4
816	add	r7,r7,r12			@ h+=K256[i]
817	eor	r2,r2,r6			@ Ch(e,f,g)
818	eor	r0,r8,r8,ror#11
819	add	r7,r7,r2			@ h+=Ch(e,f,g)
820#if 12==31
821	and	r12,r12,#0xff
822	cmp	r12,#0xf2			@ done?
823#endif
824#if 12<15
825# if __ARM_ARCH__>=7
826	ldr	r2,[r1],#4			@ prefetch
827# else
828	ldrb	r2,[r1,#3]
829# endif
830	eor	r12,r8,r9			@ a^b, b^c in next round
831#else
832	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
833	eor	r12,r8,r9			@ a^b, b^c in next round
834	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
835#endif
836	eor	r0,r0,r8,ror#20	@ Sigma0(a)
837	and	r3,r3,r12			@ (b^c)&=(a^b)
838	add	r11,r11,r7			@ d+=h
839	eor	r3,r3,r9			@ Maj(a,b,c)
840	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
841	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
842#if __ARM_ARCH__>=7
843	@ ldr	r2,[r1],#4			@ 13
844# if 13==15
845	str	r1,[sp,#17*4]			@ make room for r1
846# endif
847	eor	r0,r11,r11,ror#5
848	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
849	eor	r0,r0,r11,ror#19	@ Sigma1(e)
850	rev	r2,r2
851#else
852	@ ldrb	r2,[r1,#3]			@ 13
853	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
854	ldrb	r3,[r1,#2]
855	ldrb	r0,[r1,#1]
856	orr	r2,r2,r3,lsl#8
857	ldrb	r3,[r1],#4
858	orr	r2,r2,r0,lsl#16
859# if 13==15
860	str	r1,[sp,#17*4]			@ make room for r1
861# endif
862	eor	r0,r11,r11,ror#5
863	orr	r2,r2,r3,lsl#24
864	eor	r0,r0,r11,ror#19	@ Sigma1(e)
865#endif
866	ldr	r3,[r14],#4			@ *K256++
867	add	r6,r6,r2			@ h+=X[i]
868	str	r2,[sp,#13*4]
869	eor	r2,r4,r5
870	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
871	and	r2,r2,r11
872	add	r6,r6,r3			@ h+=K256[i]
873	eor	r2,r2,r5			@ Ch(e,f,g)
874	eor	r0,r7,r7,ror#11
875	add	r6,r6,r2			@ h+=Ch(e,f,g)
876#if 13==31
877	and	r3,r3,#0xff
878	cmp	r3,#0xf2			@ done?
879#endif
880#if 13<15
881# if __ARM_ARCH__>=7
882	ldr	r2,[r1],#4			@ prefetch
883# else
884	ldrb	r2,[r1,#3]
885# endif
886	eor	r3,r7,r8			@ a^b, b^c in next round
887#else
888	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
889	eor	r3,r7,r8			@ a^b, b^c in next round
890	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
891#endif
892	eor	r0,r0,r7,ror#20	@ Sigma0(a)
893	and	r12,r12,r3			@ (b^c)&=(a^b)
894	add	r10,r10,r6			@ d+=h
895	eor	r12,r12,r8			@ Maj(a,b,c)
896	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
897	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
898#if __ARM_ARCH__>=7
899	@ ldr	r2,[r1],#4			@ 14
900# if 14==15
901	str	r1,[sp,#17*4]			@ make room for r1
902# endif
903	eor	r0,r10,r10,ror#5
904	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
905	eor	r0,r0,r10,ror#19	@ Sigma1(e)
906	rev	r2,r2
907#else
908	@ ldrb	r2,[r1,#3]			@ 14
909	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
910	ldrb	r12,[r1,#2]
911	ldrb	r0,[r1,#1]
912	orr	r2,r2,r12,lsl#8
913	ldrb	r12,[r1],#4
914	orr	r2,r2,r0,lsl#16
915# if 14==15
916	str	r1,[sp,#17*4]			@ make room for r1
917# endif
918	eor	r0,r10,r10,ror#5
919	orr	r2,r2,r12,lsl#24
920	eor	r0,r0,r10,ror#19	@ Sigma1(e)
921#endif
922	ldr	r12,[r14],#4			@ *K256++
923	add	r5,r5,r2			@ h+=X[i]
924	str	r2,[sp,#14*4]
925	eor	r2,r11,r4
926	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
927	and	r2,r2,r10
928	add	r5,r5,r12			@ h+=K256[i]
929	eor	r2,r2,r4			@ Ch(e,f,g)
930	eor	r0,r6,r6,ror#11
931	add	r5,r5,r2			@ h+=Ch(e,f,g)
932#if 14==31
933	and	r12,r12,#0xff
934	cmp	r12,#0xf2			@ done?
935#endif
936#if 14<15
937# if __ARM_ARCH__>=7
938	ldr	r2,[r1],#4			@ prefetch
939# else
940	ldrb	r2,[r1,#3]
941# endif
942	eor	r12,r6,r7			@ a^b, b^c in next round
943#else
944	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
945	eor	r12,r6,r7			@ a^b, b^c in next round
946	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
947#endif
948	eor	r0,r0,r6,ror#20	@ Sigma0(a)
949	and	r3,r3,r12			@ (b^c)&=(a^b)
950	add	r9,r9,r5			@ d+=h
951	eor	r3,r3,r7			@ Maj(a,b,c)
952	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
953	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
954#if __ARM_ARCH__>=7
955	@ ldr	r2,[r1],#4			@ 15
956# if 15==15
957	str	r1,[sp,#17*4]			@ make room for r1
958# endif
959	eor	r0,r9,r9,ror#5
960	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
961	eor	r0,r0,r9,ror#19	@ Sigma1(e)
962	rev	r2,r2
963#else
964	@ ldrb	r2,[r1,#3]			@ 15
965	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
966	ldrb	r3,[r1,#2]
967	ldrb	r0,[r1,#1]
968	orr	r2,r2,r3,lsl#8
969	ldrb	r3,[r1],#4
970	orr	r2,r2,r0,lsl#16
971# if 15==15
972	str	r1,[sp,#17*4]			@ make room for r1
973# endif
974	eor	r0,r9,r9,ror#5
975	orr	r2,r2,r3,lsl#24
976	eor	r0,r0,r9,ror#19	@ Sigma1(e)
977#endif
978	ldr	r3,[r14],#4			@ *K256++
979	add	r4,r4,r2			@ h+=X[i]
980	str	r2,[sp,#15*4]
981	eor	r2,r10,r11
982	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
983	and	r2,r2,r9
984	add	r4,r4,r3			@ h+=K256[i]
985	eor	r2,r2,r11			@ Ch(e,f,g)
986	eor	r0,r5,r5,ror#11
987	add	r4,r4,r2			@ h+=Ch(e,f,g)
988#if 15==31
989	and	r3,r3,#0xff
990	cmp	r3,#0xf2			@ done?
991#endif
992#if 15<15
993# if __ARM_ARCH__>=7
994	ldr	r2,[r1],#4			@ prefetch
995# else
996	ldrb	r2,[r1,#3]
997# endif
998	eor	r3,r5,r6			@ a^b, b^c in next round
999#else
1000	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1001	eor	r3,r5,r6			@ a^b, b^c in next round
1002	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1003#endif
1004	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1005	and	r12,r12,r3			@ (b^c)&=(a^b)
1006	add	r8,r8,r4			@ d+=h
1007	eor	r12,r12,r6			@ Maj(a,b,c)
1008	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1009	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1010.Lrounds_16_xx:
1011	@ ldr	r2,[sp,#1*4]		@ 16
1012	@ ldr	r1,[sp,#14*4]
1013	mov	r0,r2,ror#7
1014	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1015	mov	r12,r1,ror#17
1016	eor	r0,r0,r2,ror#18
1017	eor	r12,r12,r1,ror#19
1018	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1019	ldr	r2,[sp,#0*4]
1020	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1021	ldr	r1,[sp,#9*4]
1022
1023	add	r12,r12,r0
1024	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1025	add	r2,r2,r12
1026	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1027	add	r2,r2,r1			@ X[i]
1028	ldr	r12,[r14],#4			@ *K256++
1029	add	r11,r11,r2			@ h+=X[i]
1030	str	r2,[sp,#0*4]
1031	eor	r2,r9,r10
1032	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1033	and	r2,r2,r8
1034	add	r11,r11,r12			@ h+=K256[i]
1035	eor	r2,r2,r10			@ Ch(e,f,g)
1036	eor	r0,r4,r4,ror#11
1037	add	r11,r11,r2			@ h+=Ch(e,f,g)
1038#if 16==31
1039	and	r12,r12,#0xff
1040	cmp	r12,#0xf2			@ done?
1041#endif
1042#if 16<15
1043# if __ARM_ARCH__>=7
1044	ldr	r2,[r1],#4			@ prefetch
1045# else
1046	ldrb	r2,[r1,#3]
1047# endif
1048	eor	r12,r4,r5			@ a^b, b^c in next round
1049#else
1050	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1051	eor	r12,r4,r5			@ a^b, b^c in next round
1052	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1053#endif
1054	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1055	and	r3,r3,r12			@ (b^c)&=(a^b)
1056	add	r7,r7,r11			@ d+=h
1057	eor	r3,r3,r5			@ Maj(a,b,c)
1058	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1059	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1060	@ ldr	r2,[sp,#2*4]		@ 17
1061	@ ldr	r1,[sp,#15*4]
1062	mov	r0,r2,ror#7
1063	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1064	mov	r3,r1,ror#17
1065	eor	r0,r0,r2,ror#18
1066	eor	r3,r3,r1,ror#19
1067	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1068	ldr	r2,[sp,#1*4]
1069	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1070	ldr	r1,[sp,#10*4]
1071
1072	add	r3,r3,r0
1073	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1074	add	r2,r2,r3
1075	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1076	add	r2,r2,r1			@ X[i]
1077	ldr	r3,[r14],#4			@ *K256++
1078	add	r10,r10,r2			@ h+=X[i]
1079	str	r2,[sp,#1*4]
1080	eor	r2,r8,r9
1081	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1082	and	r2,r2,r7
1083	add	r10,r10,r3			@ h+=K256[i]
1084	eor	r2,r2,r9			@ Ch(e,f,g)
1085	eor	r0,r11,r11,ror#11
1086	add	r10,r10,r2			@ h+=Ch(e,f,g)
1087#if 17==31
1088	and	r3,r3,#0xff
1089	cmp	r3,#0xf2			@ done?
1090#endif
1091#if 17<15
1092# if __ARM_ARCH__>=7
1093	ldr	r2,[r1],#4			@ prefetch
1094# else
1095	ldrb	r2,[r1,#3]
1096# endif
1097	eor	r3,r11,r4			@ a^b, b^c in next round
1098#else
1099	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1100	eor	r3,r11,r4			@ a^b, b^c in next round
1101	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1102#endif
1103	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1104	and	r12,r12,r3			@ (b^c)&=(a^b)
1105	add	r6,r6,r10			@ d+=h
1106	eor	r12,r12,r4			@ Maj(a,b,c)
1107	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1108	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1109	@ ldr	r2,[sp,#3*4]		@ 18
1110	@ ldr	r1,[sp,#0*4]
1111	mov	r0,r2,ror#7
1112	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1113	mov	r12,r1,ror#17
1114	eor	r0,r0,r2,ror#18
1115	eor	r12,r12,r1,ror#19
1116	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1117	ldr	r2,[sp,#2*4]
1118	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1119	ldr	r1,[sp,#11*4]
1120
1121	add	r12,r12,r0
1122	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1123	add	r2,r2,r12
1124	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1125	add	r2,r2,r1			@ X[i]
1126	ldr	r12,[r14],#4			@ *K256++
1127	add	r9,r9,r2			@ h+=X[i]
1128	str	r2,[sp,#2*4]
1129	eor	r2,r7,r8
1130	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1131	and	r2,r2,r6
1132	add	r9,r9,r12			@ h+=K256[i]
1133	eor	r2,r2,r8			@ Ch(e,f,g)
1134	eor	r0,r10,r10,ror#11
1135	add	r9,r9,r2			@ h+=Ch(e,f,g)
1136#if 18==31
1137	and	r12,r12,#0xff
1138	cmp	r12,#0xf2			@ done?
1139#endif
1140#if 18<15
1141# if __ARM_ARCH__>=7
1142	ldr	r2,[r1],#4			@ prefetch
1143# else
1144	ldrb	r2,[r1,#3]
1145# endif
1146	eor	r12,r10,r11			@ a^b, b^c in next round
1147#else
1148	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1149	eor	r12,r10,r11			@ a^b, b^c in next round
1150	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1151#endif
1152	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1153	and	r3,r3,r12			@ (b^c)&=(a^b)
1154	add	r5,r5,r9			@ d+=h
1155	eor	r3,r3,r11			@ Maj(a,b,c)
1156	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1157	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1158	@ ldr	r2,[sp,#4*4]		@ 19
1159	@ ldr	r1,[sp,#1*4]
1160	mov	r0,r2,ror#7
1161	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1162	mov	r3,r1,ror#17
1163	eor	r0,r0,r2,ror#18
1164	eor	r3,r3,r1,ror#19
1165	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1166	ldr	r2,[sp,#3*4]
1167	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1168	ldr	r1,[sp,#12*4]
1169
1170	add	r3,r3,r0
1171	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1172	add	r2,r2,r3
1173	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1174	add	r2,r2,r1			@ X[i]
1175	ldr	r3,[r14],#4			@ *K256++
1176	add	r8,r8,r2			@ h+=X[i]
1177	str	r2,[sp,#3*4]
1178	eor	r2,r6,r7
1179	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1180	and	r2,r2,r5
1181	add	r8,r8,r3			@ h+=K256[i]
1182	eor	r2,r2,r7			@ Ch(e,f,g)
1183	eor	r0,r9,r9,ror#11
1184	add	r8,r8,r2			@ h+=Ch(e,f,g)
1185#if 19==31
1186	and	r3,r3,#0xff
1187	cmp	r3,#0xf2			@ done?
1188#endif
1189#if 19<15
1190# if __ARM_ARCH__>=7
1191	ldr	r2,[r1],#4			@ prefetch
1192# else
1193	ldrb	r2,[r1,#3]
1194# endif
1195	eor	r3,r9,r10			@ a^b, b^c in next round
1196#else
1197	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1198	eor	r3,r9,r10			@ a^b, b^c in next round
1199	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1200#endif
1201	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1202	and	r12,r12,r3			@ (b^c)&=(a^b)
1203	add	r4,r4,r8			@ d+=h
1204	eor	r12,r12,r10			@ Maj(a,b,c)
1205	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1206	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1207	@ ldr	r2,[sp,#5*4]		@ 20
1208	@ ldr	r1,[sp,#2*4]
1209	mov	r0,r2,ror#7
1210	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1211	mov	r12,r1,ror#17
1212	eor	r0,r0,r2,ror#18
1213	eor	r12,r12,r1,ror#19
1214	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1215	ldr	r2,[sp,#4*4]
1216	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1217	ldr	r1,[sp,#13*4]
1218
1219	add	r12,r12,r0
1220	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1221	add	r2,r2,r12
1222	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1223	add	r2,r2,r1			@ X[i]
1224	ldr	r12,[r14],#4			@ *K256++
1225	add	r7,r7,r2			@ h+=X[i]
1226	str	r2,[sp,#4*4]
1227	eor	r2,r5,r6
1228	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1229	and	r2,r2,r4
1230	add	r7,r7,r12			@ h+=K256[i]
1231	eor	r2,r2,r6			@ Ch(e,f,g)
1232	eor	r0,r8,r8,ror#11
1233	add	r7,r7,r2			@ h+=Ch(e,f,g)
1234#if 20==31
1235	and	r12,r12,#0xff
1236	cmp	r12,#0xf2			@ done?
1237#endif
1238#if 20<15
1239# if __ARM_ARCH__>=7
1240	ldr	r2,[r1],#4			@ prefetch
1241# else
1242	ldrb	r2,[r1,#3]
1243# endif
1244	eor	r12,r8,r9			@ a^b, b^c in next round
1245#else
1246	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1247	eor	r12,r8,r9			@ a^b, b^c in next round
1248	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1249#endif
1250	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1251	and	r3,r3,r12			@ (b^c)&=(a^b)
1252	add	r11,r11,r7			@ d+=h
1253	eor	r3,r3,r9			@ Maj(a,b,c)
1254	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1255	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1256	@ ldr	r2,[sp,#6*4]		@ 21
1257	@ ldr	r1,[sp,#3*4]
1258	mov	r0,r2,ror#7
1259	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1260	mov	r3,r1,ror#17
1261	eor	r0,r0,r2,ror#18
1262	eor	r3,r3,r1,ror#19
1263	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1264	ldr	r2,[sp,#5*4]
1265	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1266	ldr	r1,[sp,#14*4]
1267
1268	add	r3,r3,r0
1269	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1270	add	r2,r2,r3
1271	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1272	add	r2,r2,r1			@ X[i]
1273	ldr	r3,[r14],#4			@ *K256++
1274	add	r6,r6,r2			@ h+=X[i]
1275	str	r2,[sp,#5*4]
1276	eor	r2,r4,r5
1277	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1278	and	r2,r2,r11
1279	add	r6,r6,r3			@ h+=K256[i]
1280	eor	r2,r2,r5			@ Ch(e,f,g)
1281	eor	r0,r7,r7,ror#11
1282	add	r6,r6,r2			@ h+=Ch(e,f,g)
1283#if 21==31
1284	and	r3,r3,#0xff
1285	cmp	r3,#0xf2			@ done?
1286#endif
1287#if 21<15
1288# if __ARM_ARCH__>=7
1289	ldr	r2,[r1],#4			@ prefetch
1290# else
1291	ldrb	r2,[r1,#3]
1292# endif
1293	eor	r3,r7,r8			@ a^b, b^c in next round
1294#else
1295	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1296	eor	r3,r7,r8			@ a^b, b^c in next round
1297	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1298#endif
1299	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1300	and	r12,r12,r3			@ (b^c)&=(a^b)
1301	add	r10,r10,r6			@ d+=h
1302	eor	r12,r12,r8			@ Maj(a,b,c)
1303	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1304	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1305	@ ldr	r2,[sp,#7*4]		@ 22
1306	@ ldr	r1,[sp,#4*4]
1307	mov	r0,r2,ror#7
1308	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1309	mov	r12,r1,ror#17
1310	eor	r0,r0,r2,ror#18
1311	eor	r12,r12,r1,ror#19
1312	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1313	ldr	r2,[sp,#6*4]
1314	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1315	ldr	r1,[sp,#15*4]
1316
1317	add	r12,r12,r0
1318	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1319	add	r2,r2,r12
1320	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1321	add	r2,r2,r1			@ X[i]
1322	ldr	r12,[r14],#4			@ *K256++
1323	add	r5,r5,r2			@ h+=X[i]
1324	str	r2,[sp,#6*4]
1325	eor	r2,r11,r4
1326	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1327	and	r2,r2,r10
1328	add	r5,r5,r12			@ h+=K256[i]
1329	eor	r2,r2,r4			@ Ch(e,f,g)
1330	eor	r0,r6,r6,ror#11
1331	add	r5,r5,r2			@ h+=Ch(e,f,g)
1332#if 22==31
1333	and	r12,r12,#0xff
1334	cmp	r12,#0xf2			@ done?
1335#endif
1336#if 22<15
1337# if __ARM_ARCH__>=7
1338	ldr	r2,[r1],#4			@ prefetch
1339# else
1340	ldrb	r2,[r1,#3]
1341# endif
1342	eor	r12,r6,r7			@ a^b, b^c in next round
1343#else
1344	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1345	eor	r12,r6,r7			@ a^b, b^c in next round
1346	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1347#endif
1348	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1349	and	r3,r3,r12			@ (b^c)&=(a^b)
1350	add	r9,r9,r5			@ d+=h
1351	eor	r3,r3,r7			@ Maj(a,b,c)
1352	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1353	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1354	@ ldr	r2,[sp,#8*4]		@ 23
1355	@ ldr	r1,[sp,#5*4]
1356	mov	r0,r2,ror#7
1357	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1358	mov	r3,r1,ror#17
1359	eor	r0,r0,r2,ror#18
1360	eor	r3,r3,r1,ror#19
1361	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1362	ldr	r2,[sp,#7*4]
1363	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1364	ldr	r1,[sp,#0*4]
1365
1366	add	r3,r3,r0
1367	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1368	add	r2,r2,r3
1369	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1370	add	r2,r2,r1			@ X[i]
1371	ldr	r3,[r14],#4			@ *K256++
1372	add	r4,r4,r2			@ h+=X[i]
1373	str	r2,[sp,#7*4]
1374	eor	r2,r10,r11
1375	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1376	and	r2,r2,r9
1377	add	r4,r4,r3			@ h+=K256[i]
1378	eor	r2,r2,r11			@ Ch(e,f,g)
1379	eor	r0,r5,r5,ror#11
1380	add	r4,r4,r2			@ h+=Ch(e,f,g)
1381#if 23==31
1382	and	r3,r3,#0xff
1383	cmp	r3,#0xf2			@ done?
1384#endif
1385#if 23<15
1386# if __ARM_ARCH__>=7
1387	ldr	r2,[r1],#4			@ prefetch
1388# else
1389	ldrb	r2,[r1,#3]
1390# endif
1391	eor	r3,r5,r6			@ a^b, b^c in next round
1392#else
1393	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1394	eor	r3,r5,r6			@ a^b, b^c in next round
1395	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1396#endif
1397	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1398	and	r12,r12,r3			@ (b^c)&=(a^b)
1399	add	r8,r8,r4			@ d+=h
1400	eor	r12,r12,r6			@ Maj(a,b,c)
1401	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1402	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1403	@ ldr	r2,[sp,#9*4]		@ 24
1404	@ ldr	r1,[sp,#6*4]
1405	mov	r0,r2,ror#7
1406	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1407	mov	r12,r1,ror#17
1408	eor	r0,r0,r2,ror#18
1409	eor	r12,r12,r1,ror#19
1410	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1411	ldr	r2,[sp,#8*4]
1412	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1413	ldr	r1,[sp,#1*4]
1414
1415	add	r12,r12,r0
1416	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1417	add	r2,r2,r12
1418	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1419	add	r2,r2,r1			@ X[i]
1420	ldr	r12,[r14],#4			@ *K256++
1421	add	r11,r11,r2			@ h+=X[i]
1422	str	r2,[sp,#8*4]
1423	eor	r2,r9,r10
1424	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1425	and	r2,r2,r8
1426	add	r11,r11,r12			@ h+=K256[i]
1427	eor	r2,r2,r10			@ Ch(e,f,g)
1428	eor	r0,r4,r4,ror#11
1429	add	r11,r11,r2			@ h+=Ch(e,f,g)
1430#if 24==31
1431	and	r12,r12,#0xff
1432	cmp	r12,#0xf2			@ done?
1433#endif
1434#if 24<15
1435# if __ARM_ARCH__>=7
1436	ldr	r2,[r1],#4			@ prefetch
1437# else
1438	ldrb	r2,[r1,#3]
1439# endif
1440	eor	r12,r4,r5			@ a^b, b^c in next round
1441#else
1442	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1443	eor	r12,r4,r5			@ a^b, b^c in next round
1444	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1445#endif
1446	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1447	and	r3,r3,r12			@ (b^c)&=(a^b)
1448	add	r7,r7,r11			@ d+=h
1449	eor	r3,r3,r5			@ Maj(a,b,c)
1450	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1451	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1452	@ ldr	r2,[sp,#10*4]		@ 25
1453	@ ldr	r1,[sp,#7*4]
1454	mov	r0,r2,ror#7
1455	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1456	mov	r3,r1,ror#17
1457	eor	r0,r0,r2,ror#18
1458	eor	r3,r3,r1,ror#19
1459	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1460	ldr	r2,[sp,#9*4]
1461	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1462	ldr	r1,[sp,#2*4]
1463
1464	add	r3,r3,r0
1465	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1466	add	r2,r2,r3
1467	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1468	add	r2,r2,r1			@ X[i]
1469	ldr	r3,[r14],#4			@ *K256++
1470	add	r10,r10,r2			@ h+=X[i]
1471	str	r2,[sp,#9*4]
1472	eor	r2,r8,r9
1473	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1474	and	r2,r2,r7
1475	add	r10,r10,r3			@ h+=K256[i]
1476	eor	r2,r2,r9			@ Ch(e,f,g)
1477	eor	r0,r11,r11,ror#11
1478	add	r10,r10,r2			@ h+=Ch(e,f,g)
1479#if 25==31
1480	and	r3,r3,#0xff
1481	cmp	r3,#0xf2			@ done?
1482#endif
1483#if 25<15
1484# if __ARM_ARCH__>=7
1485	ldr	r2,[r1],#4			@ prefetch
1486# else
1487	ldrb	r2,[r1,#3]
1488# endif
1489	eor	r3,r11,r4			@ a^b, b^c in next round
1490#else
1491	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1492	eor	r3,r11,r4			@ a^b, b^c in next round
1493	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1494#endif
1495	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1496	and	r12,r12,r3			@ (b^c)&=(a^b)
1497	add	r6,r6,r10			@ d+=h
1498	eor	r12,r12,r4			@ Maj(a,b,c)
1499	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1500	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1501	@ ldr	r2,[sp,#11*4]		@ 26
1502	@ ldr	r1,[sp,#8*4]
1503	mov	r0,r2,ror#7
1504	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1505	mov	r12,r1,ror#17
1506	eor	r0,r0,r2,ror#18
1507	eor	r12,r12,r1,ror#19
1508	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1509	ldr	r2,[sp,#10*4]
1510	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1511	ldr	r1,[sp,#3*4]
1512
1513	add	r12,r12,r0
1514	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1515	add	r2,r2,r12
1516	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1517	add	r2,r2,r1			@ X[i]
1518	ldr	r12,[r14],#4			@ *K256++
1519	add	r9,r9,r2			@ h+=X[i]
1520	str	r2,[sp,#10*4]
1521	eor	r2,r7,r8
1522	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1523	and	r2,r2,r6
1524	add	r9,r9,r12			@ h+=K256[i]
1525	eor	r2,r2,r8			@ Ch(e,f,g)
1526	eor	r0,r10,r10,ror#11
1527	add	r9,r9,r2			@ h+=Ch(e,f,g)
1528#if 26==31
1529	and	r12,r12,#0xff
1530	cmp	r12,#0xf2			@ done?
1531#endif
1532#if 26<15
1533# if __ARM_ARCH__>=7
1534	ldr	r2,[r1],#4			@ prefetch
1535# else
1536	ldrb	r2,[r1,#3]
1537# endif
1538	eor	r12,r10,r11			@ a^b, b^c in next round
1539#else
1540	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1541	eor	r12,r10,r11			@ a^b, b^c in next round
1542	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1543#endif
1544	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1545	and	r3,r3,r12			@ (b^c)&=(a^b)
1546	add	r5,r5,r9			@ d+=h
1547	eor	r3,r3,r11			@ Maj(a,b,c)
1548	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1549	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1550	@ ldr	r2,[sp,#12*4]		@ 27
1551	@ ldr	r1,[sp,#9*4]
1552	mov	r0,r2,ror#7
1553	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1554	mov	r3,r1,ror#17
1555	eor	r0,r0,r2,ror#18
1556	eor	r3,r3,r1,ror#19
1557	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1558	ldr	r2,[sp,#11*4]
1559	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1560	ldr	r1,[sp,#4*4]
1561
1562	add	r3,r3,r0
1563	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1564	add	r2,r2,r3
1565	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1566	add	r2,r2,r1			@ X[i]
1567	ldr	r3,[r14],#4			@ *K256++
1568	add	r8,r8,r2			@ h+=X[i]
1569	str	r2,[sp,#11*4]
1570	eor	r2,r6,r7
1571	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1572	and	r2,r2,r5
1573	add	r8,r8,r3			@ h+=K256[i]
1574	eor	r2,r2,r7			@ Ch(e,f,g)
1575	eor	r0,r9,r9,ror#11
1576	add	r8,r8,r2			@ h+=Ch(e,f,g)
1577#if 27==31
1578	and	r3,r3,#0xff
1579	cmp	r3,#0xf2			@ done?
1580#endif
1581#if 27<15
1582# if __ARM_ARCH__>=7
1583	ldr	r2,[r1],#4			@ prefetch
1584# else
1585	ldrb	r2,[r1,#3]
1586# endif
1587	eor	r3,r9,r10			@ a^b, b^c in next round
1588#else
1589	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1590	eor	r3,r9,r10			@ a^b, b^c in next round
1591	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1592#endif
1593	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1594	and	r12,r12,r3			@ (b^c)&=(a^b)
1595	add	r4,r4,r8			@ d+=h
1596	eor	r12,r12,r10			@ Maj(a,b,c)
1597	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1598	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1599	@ ldr	r2,[sp,#13*4]		@ 28
1600	@ ldr	r1,[sp,#10*4]
1601	mov	r0,r2,ror#7
1602	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1603	mov	r12,r1,ror#17
1604	eor	r0,r0,r2,ror#18
1605	eor	r12,r12,r1,ror#19
1606	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1607	ldr	r2,[sp,#12*4]
1608	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1609	ldr	r1,[sp,#5*4]
1610
1611	add	r12,r12,r0
1612	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1613	add	r2,r2,r12
1614	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1615	add	r2,r2,r1			@ X[i]
1616	ldr	r12,[r14],#4			@ *K256++
1617	add	r7,r7,r2			@ h+=X[i]
1618	str	r2,[sp,#12*4]
1619	eor	r2,r5,r6
1620	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1621	and	r2,r2,r4
1622	add	r7,r7,r12			@ h+=K256[i]
1623	eor	r2,r2,r6			@ Ch(e,f,g)
1624	eor	r0,r8,r8,ror#11
1625	add	r7,r7,r2			@ h+=Ch(e,f,g)
1626#if 28==31
1627	and	r12,r12,#0xff
1628	cmp	r12,#0xf2			@ done?
1629#endif
1630#if 28<15
1631# if __ARM_ARCH__>=7
1632	ldr	r2,[r1],#4			@ prefetch
1633# else
1634	ldrb	r2,[r1,#3]
1635# endif
1636	eor	r12,r8,r9			@ a^b, b^c in next round
1637#else
1638	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1639	eor	r12,r8,r9			@ a^b, b^c in next round
1640	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1641#endif
1642	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1643	and	r3,r3,r12			@ (b^c)&=(a^b)
1644	add	r11,r11,r7			@ d+=h
1645	eor	r3,r3,r9			@ Maj(a,b,c)
1646	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1647	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1648	@ ldr	r2,[sp,#14*4]		@ 29
1649	@ ldr	r1,[sp,#11*4]
1650	mov	r0,r2,ror#7
1651	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1652	mov	r3,r1,ror#17
1653	eor	r0,r0,r2,ror#18
1654	eor	r3,r3,r1,ror#19
1655	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1656	ldr	r2,[sp,#13*4]
1657	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1658	ldr	r1,[sp,#6*4]
1659
1660	add	r3,r3,r0
1661	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1662	add	r2,r2,r3
1663	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1664	add	r2,r2,r1			@ X[i]
1665	ldr	r3,[r14],#4			@ *K256++
1666	add	r6,r6,r2			@ h+=X[i]
1667	str	r2,[sp,#13*4]
1668	eor	r2,r4,r5
1669	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1670	and	r2,r2,r11
1671	add	r6,r6,r3			@ h+=K256[i]
1672	eor	r2,r2,r5			@ Ch(e,f,g)
1673	eor	r0,r7,r7,ror#11
1674	add	r6,r6,r2			@ h+=Ch(e,f,g)
1675#if 29==31
1676	and	r3,r3,#0xff
1677	cmp	r3,#0xf2			@ done?
1678#endif
1679#if 29<15
1680# if __ARM_ARCH__>=7
1681	ldr	r2,[r1],#4			@ prefetch
1682# else
1683	ldrb	r2,[r1,#3]
1684# endif
1685	eor	r3,r7,r8			@ a^b, b^c in next round
1686#else
1687	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1688	eor	r3,r7,r8			@ a^b, b^c in next round
1689	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1690#endif
1691	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1692	and	r12,r12,r3			@ (b^c)&=(a^b)
1693	add	r10,r10,r6			@ d+=h
1694	eor	r12,r12,r8			@ Maj(a,b,c)
1695	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1696	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1697	@ ldr	r2,[sp,#15*4]		@ 30
1698	@ ldr	r1,[sp,#12*4]
1699	mov	r0,r2,ror#7
1700	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1701	mov	r12,r1,ror#17
1702	eor	r0,r0,r2,ror#18
1703	eor	r12,r12,r1,ror#19
1704	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1705	ldr	r2,[sp,#14*4]
1706	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1707	ldr	r1,[sp,#7*4]
1708
1709	add	r12,r12,r0
1710	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1711	add	r2,r2,r12
1712	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1713	add	r2,r2,r1			@ X[i]
1714	ldr	r12,[r14],#4			@ *K256++
1715	add	r5,r5,r2			@ h+=X[i]
1716	str	r2,[sp,#14*4]
1717	eor	r2,r11,r4
1718	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1719	and	r2,r2,r10
1720	add	r5,r5,r12			@ h+=K256[i]
1721	eor	r2,r2,r4			@ Ch(e,f,g)
1722	eor	r0,r6,r6,ror#11
1723	add	r5,r5,r2			@ h+=Ch(e,f,g)
1724#if 30==31
1725	and	r12,r12,#0xff
1726	cmp	r12,#0xf2			@ done?
1727#endif
1728#if 30<15
1729# if __ARM_ARCH__>=7
1730	ldr	r2,[r1],#4			@ prefetch
1731# else
1732	ldrb	r2,[r1,#3]
1733# endif
1734	eor	r12,r6,r7			@ a^b, b^c in next round
1735#else
1736	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1737	eor	r12,r6,r7			@ a^b, b^c in next round
1738	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1739#endif
1740	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1741	and	r3,r3,r12			@ (b^c)&=(a^b)
1742	add	r9,r9,r5			@ d+=h
1743	eor	r3,r3,r7			@ Maj(a,b,c)
1744	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1745	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1746	@ ldr	r2,[sp,#0*4]		@ 31
1747	@ ldr	r1,[sp,#13*4]
1748	mov	r0,r2,ror#7
1749	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1750	mov	r3,r1,ror#17
1751	eor	r0,r0,r2,ror#18
1752	eor	r3,r3,r1,ror#19
1753	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1754	ldr	r2,[sp,#15*4]
1755	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1756	ldr	r1,[sp,#8*4]
1757
1758	add	r3,r3,r0
1759	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1760	add	r2,r2,r3
1761	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1762	add	r2,r2,r1			@ X[i]
1763	ldr	r3,[r14],#4			@ *K256++
1764	add	r4,r4,r2			@ h+=X[i]
1765	str	r2,[sp,#15*4]
1766	eor	r2,r10,r11
1767	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1768	and	r2,r2,r9
1769	add	r4,r4,r3			@ h+=K256[i]
1770	eor	r2,r2,r11			@ Ch(e,f,g)
1771	eor	r0,r5,r5,ror#11
1772	add	r4,r4,r2			@ h+=Ch(e,f,g)
1773#if 31==31
1774	and	r3,r3,#0xff
1775	cmp	r3,#0xf2			@ done?
1776#endif
1777#if 31<15
1778# if __ARM_ARCH__>=7
1779	ldr	r2,[r1],#4			@ prefetch
1780# else
1781	ldrb	r2,[r1,#3]
1782# endif
1783	eor	r3,r5,r6			@ a^b, b^c in next round
1784#else
1785	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1786	eor	r3,r5,r6			@ a^b, b^c in next round
1787	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1788#endif
1789	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1790	and	r12,r12,r3			@ (b^c)&=(a^b)
1791	add	r8,r8,r4			@ d+=h
1792	eor	r12,r12,r6			@ Maj(a,b,c)
1793	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1794	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1795#if __ARM_ARCH__>=7
1796	ite	eq			@ Thumb2 thing, sanity check in ARM
1797#endif
1798	ldreq	r3,[sp,#16*4]		@ pull ctx
1799	bne	.Lrounds_16_xx
1800
1801	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1802	ldr	r0,[r3,#0]
1803	ldr	r2,[r3,#4]
1804	ldr	r12,[r3,#8]
1805	add	r4,r4,r0
1806	ldr	r0,[r3,#12]
1807	add	r5,r5,r2
1808	ldr	r2,[r3,#16]
1809	add	r6,r6,r12
1810	ldr	r12,[r3,#20]
1811	add	r7,r7,r0
1812	ldr	r0,[r3,#24]
1813	add	r8,r8,r2
1814	ldr	r2,[r3,#28]
1815	add	r9,r9,r12
1816	ldr	r1,[sp,#17*4]		@ pull inp
1817	ldr	r12,[sp,#18*4]		@ pull inp+len
1818	add	r10,r10,r0
1819	add	r11,r11,r2
1820	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1821	cmp	r1,r12
1822	sub	r14,r14,#256	@ rewind Ktbl
1823	bne	.Loop
1824
1825	add	sp,sp,#19*4	@ destroy frame
1826#if __ARM_ARCH__>=5
1827	ldmia	sp!,{r4-r11,pc}
1828#else
1829	ldmia	sp!,{r4-r11,lr}
1830	tst	lr,#1
1831	moveq	pc,lr			@ be binary compatible with V4, yet
1832	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1833#endif
1834.size	sha256_block_data_order,.-sha256_block_data_order
1835#if __ARM_MAX_ARCH__>=7
1836.arch	armv7-a
1837.fpu	neon
1838
1839.global	sha256_block_data_order_neon
1840.type	sha256_block_data_order_neon,%function
1841.align	4
1842sha256_block_data_order_neon:
1843.LNEON:
1844	stmdb	sp!,{r4-r12,lr}
1845
1846	sub	r11,sp,#16*4+16
1847	adr	r14,K256
1848	bic	r11,r11,#15		@ align for 128-bit stores
1849	mov	r12,sp
1850	mov	sp,r11			@ alloca
1851	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1852
1853	vld1.8		{q0},[r1]!
1854	vld1.8		{q1},[r1]!
1855	vld1.8		{q2},[r1]!
1856	vld1.8		{q3},[r1]!
1857	vld1.32		{q8},[r14,:128]!
1858	vld1.32		{q9},[r14,:128]!
1859	vld1.32		{q10},[r14,:128]!
1860	vld1.32		{q11},[r14,:128]!
1861	vrev32.8	q0,q0		@ yes, even on
1862	str		r0,[sp,#64]
1863	vrev32.8	q1,q1		@ big-endian
1864	str		r1,[sp,#68]
1865	mov		r1,sp
1866	vrev32.8	q2,q2
1867	str		r2,[sp,#72]
1868	vrev32.8	q3,q3
1869	str		r12,[sp,#76]		@ save original sp
1870	vadd.i32	q8,q8,q0
1871	vadd.i32	q9,q9,q1
1872	vst1.32		{q8},[r1,:128]!
1873	vadd.i32	q10,q10,q2
1874	vst1.32		{q9},[r1,:128]!
1875	vadd.i32	q11,q11,q3
1876	vst1.32		{q10},[r1,:128]!
1877	vst1.32		{q11},[r1,:128]!
1878
1879	ldmia		r0,{r4-r11}
1880	sub		r1,r1,#64
1881	ldr		r2,[sp,#0]
1882	eor		r12,r12,r12
1883	eor		r3,r5,r6
1884	b		.L_00_48
1885
1886.align	4
1887.L_00_48:
1888	vext.8	q8,q0,q1,#4
1889	add	r11,r11,r2
1890	eor	r2,r9,r10
1891	eor	r0,r8,r8,ror#5
1892	vext.8	q9,q2,q3,#4
1893	add	r4,r4,r12
1894	and	r2,r2,r8
1895	eor	r12,r0,r8,ror#19
1896	vshr.u32	q10,q8,#7
1897	eor	r0,r4,r4,ror#11
1898	eor	r2,r2,r10
1899	vadd.i32	q0,q0,q9
1900	add	r11,r11,r12,ror#6
1901	eor	r12,r4,r5
1902	vshr.u32	q9,q8,#3
1903	eor	r0,r0,r4,ror#20
1904	add	r11,r11,r2
1905	vsli.32	q10,q8,#25
1906	ldr	r2,[sp,#4]
1907	and	r3,r3,r12
1908	vshr.u32	q11,q8,#18
1909	add	r7,r7,r11
1910	add	r11,r11,r0,ror#2
1911	eor	r3,r3,r5
1912	veor	q9,q9,q10
1913	add	r10,r10,r2
1914	vsli.32	q11,q8,#14
1915	eor	r2,r8,r9
1916	eor	r0,r7,r7,ror#5
1917	vshr.u32	d24,d7,#17
1918	add	r11,r11,r3
1919	and	r2,r2,r7
1920	veor	q9,q9,q11
1921	eor	r3,r0,r7,ror#19
1922	eor	r0,r11,r11,ror#11
1923	vsli.32	d24,d7,#15
1924	eor	r2,r2,r9
1925	add	r10,r10,r3,ror#6
1926	vshr.u32	d25,d7,#10
1927	eor	r3,r11,r4
1928	eor	r0,r0,r11,ror#20
1929	vadd.i32	q0,q0,q9
1930	add	r10,r10,r2
1931	ldr	r2,[sp,#8]
1932	veor	d25,d25,d24
1933	and	r12,r12,r3
1934	add	r6,r6,r10
1935	vshr.u32	d24,d7,#19
1936	add	r10,r10,r0,ror#2
1937	eor	r12,r12,r4
1938	vsli.32	d24,d7,#13
1939	add	r9,r9,r2
1940	eor	r2,r7,r8
1941	veor	d25,d25,d24
1942	eor	r0,r6,r6,ror#5
1943	add	r10,r10,r12
1944	vadd.i32	d0,d0,d25
1945	and	r2,r2,r6
1946	eor	r12,r0,r6,ror#19
1947	vshr.u32	d24,d0,#17
1948	eor	r0,r10,r10,ror#11
1949	eor	r2,r2,r8
1950	vsli.32	d24,d0,#15
1951	add	r9,r9,r12,ror#6
1952	eor	r12,r10,r11
1953	vshr.u32	d25,d0,#10
1954	eor	r0,r0,r10,ror#20
1955	add	r9,r9,r2
1956	veor	d25,d25,d24
1957	ldr	r2,[sp,#12]
1958	and	r3,r3,r12
1959	vshr.u32	d24,d0,#19
1960	add	r5,r5,r9
1961	add	r9,r9,r0,ror#2
1962	eor	r3,r3,r11
1963	vld1.32	{q8},[r14,:128]!
1964	add	r8,r8,r2
1965	vsli.32	d24,d0,#13
1966	eor	r2,r6,r7
1967	eor	r0,r5,r5,ror#5
1968	veor	d25,d25,d24
1969	add	r9,r9,r3
1970	and	r2,r2,r5
1971	vadd.i32	d1,d1,d25
1972	eor	r3,r0,r5,ror#19
1973	eor	r0,r9,r9,ror#11
1974	vadd.i32	q8,q8,q0
1975	eor	r2,r2,r7
1976	add	r8,r8,r3,ror#6
1977	eor	r3,r9,r10
1978	eor	r0,r0,r9,ror#20
1979	add	r8,r8,r2
1980	ldr	r2,[sp,#16]
1981	and	r12,r12,r3
1982	add	r4,r4,r8
1983	vst1.32	{q8},[r1,:128]!
1984	add	r8,r8,r0,ror#2
1985	eor	r12,r12,r10
1986	vext.8	q8,q1,q2,#4
1987	add	r7,r7,r2
1988	eor	r2,r5,r6
1989	eor	r0,r4,r4,ror#5
1990	vext.8	q9,q3,q0,#4
1991	add	r8,r8,r12
1992	and	r2,r2,r4
1993	eor	r12,r0,r4,ror#19
1994	vshr.u32	q10,q8,#7
1995	eor	r0,r8,r8,ror#11
1996	eor	r2,r2,r6
1997	vadd.i32	q1,q1,q9
1998	add	r7,r7,r12,ror#6
1999	eor	r12,r8,r9
2000	vshr.u32	q9,q8,#3
2001	eor	r0,r0,r8,ror#20
2002	add	r7,r7,r2
2003	vsli.32	q10,q8,#25
2004	ldr	r2,[sp,#20]
2005	and	r3,r3,r12
2006	vshr.u32	q11,q8,#18
2007	add	r11,r11,r7
2008	add	r7,r7,r0,ror#2
2009	eor	r3,r3,r9
2010	veor	q9,q9,q10
2011	add	r6,r6,r2
2012	vsli.32	q11,q8,#14
2013	eor	r2,r4,r5
2014	eor	r0,r11,r11,ror#5
2015	vshr.u32	d24,d1,#17
2016	add	r7,r7,r3
2017	and	r2,r2,r11
2018	veor	q9,q9,q11
2019	eor	r3,r0,r11,ror#19
2020	eor	r0,r7,r7,ror#11
2021	vsli.32	d24,d1,#15
2022	eor	r2,r2,r5
2023	add	r6,r6,r3,ror#6
2024	vshr.u32	d25,d1,#10
2025	eor	r3,r7,r8
2026	eor	r0,r0,r7,ror#20
2027	vadd.i32	q1,q1,q9
2028	add	r6,r6,r2
2029	ldr	r2,[sp,#24]
2030	veor	d25,d25,d24
2031	and	r12,r12,r3
2032	add	r10,r10,r6
2033	vshr.u32	d24,d1,#19
2034	add	r6,r6,r0,ror#2
2035	eor	r12,r12,r8
2036	vsli.32	d24,d1,#13
2037	add	r5,r5,r2
2038	eor	r2,r11,r4
2039	veor	d25,d25,d24
2040	eor	r0,r10,r10,ror#5
2041	add	r6,r6,r12
2042	vadd.i32	d2,d2,d25
2043	and	r2,r2,r10
2044	eor	r12,r0,r10,ror#19
2045	vshr.u32	d24,d2,#17
2046	eor	r0,r6,r6,ror#11
2047	eor	r2,r2,r4
2048	vsli.32	d24,d2,#15
2049	add	r5,r5,r12,ror#6
2050	eor	r12,r6,r7
2051	vshr.u32	d25,d2,#10
2052	eor	r0,r0,r6,ror#20
2053	add	r5,r5,r2
2054	veor	d25,d25,d24
2055	ldr	r2,[sp,#28]
2056	and	r3,r3,r12
2057	vshr.u32	d24,d2,#19
2058	add	r9,r9,r5
2059	add	r5,r5,r0,ror#2
2060	eor	r3,r3,r7
2061	vld1.32	{q8},[r14,:128]!
2062	add	r4,r4,r2
2063	vsli.32	d24,d2,#13
2064	eor	r2,r10,r11
2065	eor	r0,r9,r9,ror#5
2066	veor	d25,d25,d24
2067	add	r5,r5,r3
2068	and	r2,r2,r9
2069	vadd.i32	d3,d3,d25
2070	eor	r3,r0,r9,ror#19
2071	eor	r0,r5,r5,ror#11
2072	vadd.i32	q8,q8,q1
2073	eor	r2,r2,r11
2074	add	r4,r4,r3,ror#6
2075	eor	r3,r5,r6
2076	eor	r0,r0,r5,ror#20
2077	add	r4,r4,r2
2078	ldr	r2,[sp,#32]
2079	and	r12,r12,r3
2080	add	r8,r8,r4
2081	vst1.32	{q8},[r1,:128]!
2082	add	r4,r4,r0,ror#2
2083	eor	r12,r12,r6
2084	vext.8	q8,q2,q3,#4
2085	add	r11,r11,r2
2086	eor	r2,r9,r10
2087	eor	r0,r8,r8,ror#5
2088	vext.8	q9,q0,q1,#4
2089	add	r4,r4,r12
2090	and	r2,r2,r8
2091	eor	r12,r0,r8,ror#19
2092	vshr.u32	q10,q8,#7
2093	eor	r0,r4,r4,ror#11
2094	eor	r2,r2,r10
2095	vadd.i32	q2,q2,q9
2096	add	r11,r11,r12,ror#6
2097	eor	r12,r4,r5
2098	vshr.u32	q9,q8,#3
2099	eor	r0,r0,r4,ror#20
2100	add	r11,r11,r2
2101	vsli.32	q10,q8,#25
2102	ldr	r2,[sp,#36]
2103	and	r3,r3,r12
2104	vshr.u32	q11,q8,#18
2105	add	r7,r7,r11
2106	add	r11,r11,r0,ror#2
2107	eor	r3,r3,r5
2108	veor	q9,q9,q10
2109	add	r10,r10,r2
2110	vsli.32	q11,q8,#14
2111	eor	r2,r8,r9
2112	eor	r0,r7,r7,ror#5
2113	vshr.u32	d24,d3,#17
2114	add	r11,r11,r3
2115	and	r2,r2,r7
2116	veor	q9,q9,q11
2117	eor	r3,r0,r7,ror#19
2118	eor	r0,r11,r11,ror#11
2119	vsli.32	d24,d3,#15
2120	eor	r2,r2,r9
2121	add	r10,r10,r3,ror#6
2122	vshr.u32	d25,d3,#10
2123	eor	r3,r11,r4
2124	eor	r0,r0,r11,ror#20
2125	vadd.i32	q2,q2,q9
2126	add	r10,r10,r2
2127	ldr	r2,[sp,#40]
2128	veor	d25,d25,d24
2129	and	r12,r12,r3
2130	add	r6,r6,r10
2131	vshr.u32	d24,d3,#19
2132	add	r10,r10,r0,ror#2
2133	eor	r12,r12,r4
2134	vsli.32	d24,d3,#13
2135	add	r9,r9,r2
2136	eor	r2,r7,r8
2137	veor	d25,d25,d24
2138	eor	r0,r6,r6,ror#5
2139	add	r10,r10,r12
2140	vadd.i32	d4,d4,d25
2141	and	r2,r2,r6
2142	eor	r12,r0,r6,ror#19
2143	vshr.u32	d24,d4,#17
2144	eor	r0,r10,r10,ror#11
2145	eor	r2,r2,r8
2146	vsli.32	d24,d4,#15
2147	add	r9,r9,r12,ror#6
2148	eor	r12,r10,r11
2149	vshr.u32	d25,d4,#10
2150	eor	r0,r0,r10,ror#20
2151	add	r9,r9,r2
2152	veor	d25,d25,d24
2153	ldr	r2,[sp,#44]
2154	and	r3,r3,r12
2155	vshr.u32	d24,d4,#19
2156	add	r5,r5,r9
2157	add	r9,r9,r0,ror#2
2158	eor	r3,r3,r11
2159	vld1.32	{q8},[r14,:128]!
2160	add	r8,r8,r2
2161	vsli.32	d24,d4,#13
2162	eor	r2,r6,r7
2163	eor	r0,r5,r5,ror#5
2164	veor	d25,d25,d24
2165	add	r9,r9,r3
2166	and	r2,r2,r5
2167	vadd.i32	d5,d5,d25
2168	eor	r3,r0,r5,ror#19
2169	eor	r0,r9,r9,ror#11
2170	vadd.i32	q8,q8,q2
2171	eor	r2,r2,r7
2172	add	r8,r8,r3,ror#6
2173	eor	r3,r9,r10
2174	eor	r0,r0,r9,ror#20
2175	add	r8,r8,r2
2176	ldr	r2,[sp,#48]
2177	and	r12,r12,r3
2178	add	r4,r4,r8
2179	vst1.32	{q8},[r1,:128]!
2180	add	r8,r8,r0,ror#2
2181	eor	r12,r12,r10
2182	vext.8	q8,q3,q0,#4
2183	add	r7,r7,r2
2184	eor	r2,r5,r6
2185	eor	r0,r4,r4,ror#5
2186	vext.8	q9,q1,q2,#4
2187	add	r8,r8,r12
2188	and	r2,r2,r4
2189	eor	r12,r0,r4,ror#19
2190	vshr.u32	q10,q8,#7
2191	eor	r0,r8,r8,ror#11
2192	eor	r2,r2,r6
2193	vadd.i32	q3,q3,q9
2194	add	r7,r7,r12,ror#6
2195	eor	r12,r8,r9
2196	vshr.u32	q9,q8,#3
2197	eor	r0,r0,r8,ror#20
2198	add	r7,r7,r2
2199	vsli.32	q10,q8,#25
2200	ldr	r2,[sp,#52]
2201	and	r3,r3,r12
2202	vshr.u32	q11,q8,#18
2203	add	r11,r11,r7
2204	add	r7,r7,r0,ror#2
2205	eor	r3,r3,r9
2206	veor	q9,q9,q10
2207	add	r6,r6,r2
2208	vsli.32	q11,q8,#14
2209	eor	r2,r4,r5
2210	eor	r0,r11,r11,ror#5
2211	vshr.u32	d24,d5,#17
2212	add	r7,r7,r3
2213	and	r2,r2,r11
2214	veor	q9,q9,q11
2215	eor	r3,r0,r11,ror#19
2216	eor	r0,r7,r7,ror#11
2217	vsli.32	d24,d5,#15
2218	eor	r2,r2,r5
2219	add	r6,r6,r3,ror#6
2220	vshr.u32	d25,d5,#10
2221	eor	r3,r7,r8
2222	eor	r0,r0,r7,ror#20
2223	vadd.i32	q3,q3,q9
2224	add	r6,r6,r2
2225	ldr	r2,[sp,#56]
2226	veor	d25,d25,d24
2227	and	r12,r12,r3
2228	add	r10,r10,r6
2229	vshr.u32	d24,d5,#19
2230	add	r6,r6,r0,ror#2
2231	eor	r12,r12,r8
2232	vsli.32	d24,d5,#13
2233	add	r5,r5,r2
2234	eor	r2,r11,r4
2235	veor	d25,d25,d24
2236	eor	r0,r10,r10,ror#5
2237	add	r6,r6,r12
2238	vadd.i32	d6,d6,d25
2239	and	r2,r2,r10
2240	eor	r12,r0,r10,ror#19
2241	vshr.u32	d24,d6,#17
2242	eor	r0,r6,r6,ror#11
2243	eor	r2,r2,r4
2244	vsli.32	d24,d6,#15
2245	add	r5,r5,r12,ror#6
2246	eor	r12,r6,r7
2247	vshr.u32	d25,d6,#10
2248	eor	r0,r0,r6,ror#20
2249	add	r5,r5,r2
2250	veor	d25,d25,d24
2251	ldr	r2,[sp,#60]
2252	and	r3,r3,r12
2253	vshr.u32	d24,d6,#19
2254	add	r9,r9,r5
2255	add	r5,r5,r0,ror#2
2256	eor	r3,r3,r7
2257	vld1.32	{q8},[r14,:128]!
2258	add	r4,r4,r2
2259	vsli.32	d24,d6,#13
2260	eor	r2,r10,r11
2261	eor	r0,r9,r9,ror#5
2262	veor	d25,d25,d24
2263	add	r5,r5,r3
2264	and	r2,r2,r9
2265	vadd.i32	d7,d7,d25
2266	eor	r3,r0,r9,ror#19
2267	eor	r0,r5,r5,ror#11
2268	vadd.i32	q8,q8,q3
2269	eor	r2,r2,r11
2270	add	r4,r4,r3,ror#6
2271	eor	r3,r5,r6
2272	eor	r0,r0,r5,ror#20
2273	add	r4,r4,r2
2274	ldr	r2,[r14]
2275	and	r12,r12,r3
2276	add	r8,r8,r4
2277	vst1.32	{q8},[r1,:128]!
2278	add	r4,r4,r0,ror#2
2279	eor	r12,r12,r6
2280	teq	r2,#0				@ check for K256 terminator
2281	ldr	r2,[sp,#0]
2282	sub	r1,r1,#64
2283	bne	.L_00_48
2284
2285	ldr		r1,[sp,#68]
2286	ldr		r0,[sp,#72]
2287	sub		r14,r14,#256	@ rewind r14
2288	teq		r1,r0
2289	it		eq
2290	subeq		r1,r1,#64		@ avoid SEGV
2291	vld1.8		{q0},[r1]!		@ load next input block
2292	vld1.8		{q1},[r1]!
2293	vld1.8		{q2},[r1]!
2294	vld1.8		{q3},[r1]!
2295	it		ne
2296	strne		r1,[sp,#68]
2297	mov		r1,sp
2298	add	r11,r11,r2
2299	eor	r2,r9,r10
2300	eor	r0,r8,r8,ror#5
2301	add	r4,r4,r12
2302	vld1.32	{q8},[r14,:128]!
2303	and	r2,r2,r8
2304	eor	r12,r0,r8,ror#19
2305	eor	r0,r4,r4,ror#11
2306	eor	r2,r2,r10
2307	vrev32.8	q0,q0
2308	add	r11,r11,r12,ror#6
2309	eor	r12,r4,r5
2310	eor	r0,r0,r4,ror#20
2311	add	r11,r11,r2
2312	vadd.i32	q8,q8,q0
2313	ldr	r2,[sp,#4]
2314	and	r3,r3,r12
2315	add	r7,r7,r11
2316	add	r11,r11,r0,ror#2
2317	eor	r3,r3,r5
2318	add	r10,r10,r2
2319	eor	r2,r8,r9
2320	eor	r0,r7,r7,ror#5
2321	add	r11,r11,r3
2322	and	r2,r2,r7
2323	eor	r3,r0,r7,ror#19
2324	eor	r0,r11,r11,ror#11
2325	eor	r2,r2,r9
2326	add	r10,r10,r3,ror#6
2327	eor	r3,r11,r4
2328	eor	r0,r0,r11,ror#20
2329	add	r10,r10,r2
2330	ldr	r2,[sp,#8]
2331	and	r12,r12,r3
2332	add	r6,r6,r10
2333	add	r10,r10,r0,ror#2
2334	eor	r12,r12,r4
2335	add	r9,r9,r2
2336	eor	r2,r7,r8
2337	eor	r0,r6,r6,ror#5
2338	add	r10,r10,r12
2339	and	r2,r2,r6
2340	eor	r12,r0,r6,ror#19
2341	eor	r0,r10,r10,ror#11
2342	eor	r2,r2,r8
2343	add	r9,r9,r12,ror#6
2344	eor	r12,r10,r11
2345	eor	r0,r0,r10,ror#20
2346	add	r9,r9,r2
2347	ldr	r2,[sp,#12]
2348	and	r3,r3,r12
2349	add	r5,r5,r9
2350	add	r9,r9,r0,ror#2
2351	eor	r3,r3,r11
2352	add	r8,r8,r2
2353	eor	r2,r6,r7
2354	eor	r0,r5,r5,ror#5
2355	add	r9,r9,r3
2356	and	r2,r2,r5
2357	eor	r3,r0,r5,ror#19
2358	eor	r0,r9,r9,ror#11
2359	eor	r2,r2,r7
2360	add	r8,r8,r3,ror#6
2361	eor	r3,r9,r10
2362	eor	r0,r0,r9,ror#20
2363	add	r8,r8,r2
2364	ldr	r2,[sp,#16]
2365	and	r12,r12,r3
2366	add	r4,r4,r8
2367	add	r8,r8,r0,ror#2
2368	eor	r12,r12,r10
2369	vst1.32	{q8},[r1,:128]!
2370	add	r7,r7,r2
2371	eor	r2,r5,r6
2372	eor	r0,r4,r4,ror#5
2373	add	r8,r8,r12
2374	vld1.32	{q8},[r14,:128]!
2375	and	r2,r2,r4
2376	eor	r12,r0,r4,ror#19
2377	eor	r0,r8,r8,ror#11
2378	eor	r2,r2,r6
2379	vrev32.8	q1,q1
2380	add	r7,r7,r12,ror#6
2381	eor	r12,r8,r9
2382	eor	r0,r0,r8,ror#20
2383	add	r7,r7,r2
2384	vadd.i32	q8,q8,q1
2385	ldr	r2,[sp,#20]
2386	and	r3,r3,r12
2387	add	r11,r11,r7
2388	add	r7,r7,r0,ror#2
2389	eor	r3,r3,r9
2390	add	r6,r6,r2
2391	eor	r2,r4,r5
2392	eor	r0,r11,r11,ror#5
2393	add	r7,r7,r3
2394	and	r2,r2,r11
2395	eor	r3,r0,r11,ror#19
2396	eor	r0,r7,r7,ror#11
2397	eor	r2,r2,r5
2398	add	r6,r6,r3,ror#6
2399	eor	r3,r7,r8
2400	eor	r0,r0,r7,ror#20
2401	add	r6,r6,r2
2402	ldr	r2,[sp,#24]
2403	and	r12,r12,r3
2404	add	r10,r10,r6
2405	add	r6,r6,r0,ror#2
2406	eor	r12,r12,r8
2407	add	r5,r5,r2
2408	eor	r2,r11,r4
2409	eor	r0,r10,r10,ror#5
2410	add	r6,r6,r12
2411	and	r2,r2,r10
2412	eor	r12,r0,r10,ror#19
2413	eor	r0,r6,r6,ror#11
2414	eor	r2,r2,r4
2415	add	r5,r5,r12,ror#6
2416	eor	r12,r6,r7
2417	eor	r0,r0,r6,ror#20
2418	add	r5,r5,r2
2419	ldr	r2,[sp,#28]
2420	and	r3,r3,r12
2421	add	r9,r9,r5
2422	add	r5,r5,r0,ror#2
2423	eor	r3,r3,r7
2424	add	r4,r4,r2
2425	eor	r2,r10,r11
2426	eor	r0,r9,r9,ror#5
2427	add	r5,r5,r3
2428	and	r2,r2,r9
2429	eor	r3,r0,r9,ror#19
2430	eor	r0,r5,r5,ror#11
2431	eor	r2,r2,r11
2432	add	r4,r4,r3,ror#6
2433	eor	r3,r5,r6
2434	eor	r0,r0,r5,ror#20
2435	add	r4,r4,r2
2436	ldr	r2,[sp,#32]
2437	and	r12,r12,r3
2438	add	r8,r8,r4
2439	add	r4,r4,r0,ror#2
2440	eor	r12,r12,r6
2441	vst1.32	{q8},[r1,:128]!
2442	add	r11,r11,r2
2443	eor	r2,r9,r10
2444	eor	r0,r8,r8,ror#5
2445	add	r4,r4,r12
2446	vld1.32	{q8},[r14,:128]!
2447	and	r2,r2,r8
2448	eor	r12,r0,r8,ror#19
2449	eor	r0,r4,r4,ror#11
2450	eor	r2,r2,r10
2451	vrev32.8	q2,q2
2452	add	r11,r11,r12,ror#6
2453	eor	r12,r4,r5
2454	eor	r0,r0,r4,ror#20
2455	add	r11,r11,r2
2456	vadd.i32	q8,q8,q2
2457	ldr	r2,[sp,#36]
2458	and	r3,r3,r12
2459	add	r7,r7,r11
2460	add	r11,r11,r0,ror#2
2461	eor	r3,r3,r5
2462	add	r10,r10,r2
2463	eor	r2,r8,r9
2464	eor	r0,r7,r7,ror#5
2465	add	r11,r11,r3
2466	and	r2,r2,r7
2467	eor	r3,r0,r7,ror#19
2468	eor	r0,r11,r11,ror#11
2469	eor	r2,r2,r9
2470	add	r10,r10,r3,ror#6
2471	eor	r3,r11,r4
2472	eor	r0,r0,r11,ror#20
2473	add	r10,r10,r2
2474	ldr	r2,[sp,#40]
2475	and	r12,r12,r3
2476	add	r6,r6,r10
2477	add	r10,r10,r0,ror#2
2478	eor	r12,r12,r4
2479	add	r9,r9,r2
2480	eor	r2,r7,r8
2481	eor	r0,r6,r6,ror#5
2482	add	r10,r10,r12
2483	and	r2,r2,r6
2484	eor	r12,r0,r6,ror#19
2485	eor	r0,r10,r10,ror#11
2486	eor	r2,r2,r8
2487	add	r9,r9,r12,ror#6
2488	eor	r12,r10,r11
2489	eor	r0,r0,r10,ror#20
2490	add	r9,r9,r2
2491	ldr	r2,[sp,#44]
2492	and	r3,r3,r12
2493	add	r5,r5,r9
2494	add	r9,r9,r0,ror#2
2495	eor	r3,r3,r11
2496	add	r8,r8,r2
2497	eor	r2,r6,r7
2498	eor	r0,r5,r5,ror#5
2499	add	r9,r9,r3
2500	and	r2,r2,r5
2501	eor	r3,r0,r5,ror#19
2502	eor	r0,r9,r9,ror#11
2503	eor	r2,r2,r7
2504	add	r8,r8,r3,ror#6
2505	eor	r3,r9,r10
2506	eor	r0,r0,r9,ror#20
2507	add	r8,r8,r2
2508	ldr	r2,[sp,#48]
2509	and	r12,r12,r3
2510	add	r4,r4,r8
2511	add	r8,r8,r0,ror#2
2512	eor	r12,r12,r10
2513	vst1.32	{q8},[r1,:128]!
2514	add	r7,r7,r2
2515	eor	r2,r5,r6
2516	eor	r0,r4,r4,ror#5
2517	add	r8,r8,r12
2518	vld1.32	{q8},[r14,:128]!
2519	and	r2,r2,r4
2520	eor	r12,r0,r4,ror#19
2521	eor	r0,r8,r8,ror#11
2522	eor	r2,r2,r6
2523	vrev32.8	q3,q3
2524	add	r7,r7,r12,ror#6
2525	eor	r12,r8,r9
2526	eor	r0,r0,r8,ror#20
2527	add	r7,r7,r2
2528	vadd.i32	q8,q8,q3
2529	ldr	r2,[sp,#52]
2530	and	r3,r3,r12
2531	add	r11,r11,r7
2532	add	r7,r7,r0,ror#2
2533	eor	r3,r3,r9
2534	add	r6,r6,r2
2535	eor	r2,r4,r5
2536	eor	r0,r11,r11,ror#5
2537	add	r7,r7,r3
2538	and	r2,r2,r11
2539	eor	r3,r0,r11,ror#19
2540	eor	r0,r7,r7,ror#11
2541	eor	r2,r2,r5
2542	add	r6,r6,r3,ror#6
2543	eor	r3,r7,r8
2544	eor	r0,r0,r7,ror#20
2545	add	r6,r6,r2
2546	ldr	r2,[sp,#56]
2547	and	r12,r12,r3
2548	add	r10,r10,r6
2549	add	r6,r6,r0,ror#2
2550	eor	r12,r12,r8
2551	add	r5,r5,r2
2552	eor	r2,r11,r4
2553	eor	r0,r10,r10,ror#5
2554	add	r6,r6,r12
2555	and	r2,r2,r10
2556	eor	r12,r0,r10,ror#19
2557	eor	r0,r6,r6,ror#11
2558	eor	r2,r2,r4
2559	add	r5,r5,r12,ror#6
2560	eor	r12,r6,r7
2561	eor	r0,r0,r6,ror#20
2562	add	r5,r5,r2
2563	ldr	r2,[sp,#60]
2564	and	r3,r3,r12
2565	add	r9,r9,r5
2566	add	r5,r5,r0,ror#2
2567	eor	r3,r3,r7
2568	add	r4,r4,r2
2569	eor	r2,r10,r11
2570	eor	r0,r9,r9,ror#5
2571	add	r5,r5,r3
2572	and	r2,r2,r9
2573	eor	r3,r0,r9,ror#19
2574	eor	r0,r5,r5,ror#11
2575	eor	r2,r2,r11
2576	add	r4,r4,r3,ror#6
2577	eor	r3,r5,r6
2578	eor	r0,r0,r5,ror#20
2579	add	r4,r4,r2
2580	ldr	r2,[sp,#64]
2581	and	r12,r12,r3
2582	add	r8,r8,r4
2583	add	r4,r4,r0,ror#2
2584	eor	r12,r12,r6
2585	vst1.32	{q8},[r1,:128]!
2586	ldr	r0,[r2,#0]
2587	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2588	ldr	r12,[r2,#4]
2589	ldr	r3,[r2,#8]
2590	ldr	r1,[r2,#12]
2591	add	r4,r4,r0			@ accumulate
2592	ldr	r0,[r2,#16]
2593	add	r5,r5,r12
2594	ldr	r12,[r2,#20]
2595	add	r6,r6,r3
2596	ldr	r3,[r2,#24]
2597	add	r7,r7,r1
2598	ldr	r1,[r2,#28]
2599	add	r8,r8,r0
2600	str	r4,[r2],#4
2601	add	r9,r9,r12
2602	str	r5,[r2],#4
2603	add	r10,r10,r3
2604	str	r6,[r2],#4
2605	add	r11,r11,r1
2606	str	r7,[r2],#4
2607	stmia	r2,{r8-r11}
2608
2609	ittte	ne
2610	movne	r1,sp
2611	ldrne	r2,[sp,#0]
2612	eorne	r12,r12,r12
2613	ldreq	sp,[sp,#76]			@ restore original sp
2614	itt	ne
2615	eorne	r3,r5,r6
2616	bne	.L_00_48
2617
2618	ldmia	sp!,{r4-r12,pc}
2619.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2620#endif
2621#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2622
2623# ifdef __thumb2__
2624#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2625# else
2626#  define INST(a,b,c,d)	.byte	a,b,c,d
2627# endif
2628
2629.type	sha256_block_data_order_armv8,%function
2630.align	5
2631sha256_block_data_order_armv8:
2632.LARMv8:
2633	vld1.32	{q0,q1},[r0]
2634# ifdef __thumb2__
2635	adr	r3,.LARMv8
2636	sub	r3,r3,#.LARMv8-K256
2637# else
2638	adrl	r3,K256
2639# endif
2640	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2641
2642.Loop_v8:
2643	vld1.8		{q8-q9},[r1]!
2644	vld1.8		{q10-q11},[r1]!
2645	vld1.32		{q12},[r3]!
2646	vrev32.8	q8,q8
2647	vrev32.8	q9,q9
2648	vrev32.8	q10,q10
2649	vrev32.8	q11,q11
2650	vmov		q14,q0	@ offload
2651	vmov		q15,q1
2652	teq		r1,r2
2653	vld1.32		{q13},[r3]!
2654	vadd.i32	q12,q12,q8
2655	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2656	vmov		q2,q0
2657	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2658	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2659	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2660	vld1.32		{q12},[r3]!
2661	vadd.i32	q13,q13,q9
2662	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2663	vmov		q2,q0
2664	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2665	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2666	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2667	vld1.32		{q13},[r3]!
2668	vadd.i32	q12,q12,q10
2669	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2670	vmov		q2,q0
2671	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2672	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2673	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2674	vld1.32		{q12},[r3]!
2675	vadd.i32	q13,q13,q11
2676	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2677	vmov		q2,q0
2678	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2679	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2680	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2681	vld1.32		{q13},[r3]!
2682	vadd.i32	q12,q12,q8
2683	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2684	vmov		q2,q0
2685	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2686	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2687	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2688	vld1.32		{q12},[r3]!
2689	vadd.i32	q13,q13,q9
2690	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2691	vmov		q2,q0
2692	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2693	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2694	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2695	vld1.32		{q13},[r3]!
2696	vadd.i32	q12,q12,q10
2697	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2698	vmov		q2,q0
2699	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2700	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2701	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2702	vld1.32		{q12},[r3]!
2703	vadd.i32	q13,q13,q11
2704	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2705	vmov		q2,q0
2706	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2707	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2708	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2709	vld1.32		{q13},[r3]!
2710	vadd.i32	q12,q12,q8
2711	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2712	vmov		q2,q0
2713	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2714	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2715	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2716	vld1.32		{q12},[r3]!
2717	vadd.i32	q13,q13,q9
2718	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2719	vmov		q2,q0
2720	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2721	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2722	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2723	vld1.32		{q13},[r3]!
2724	vadd.i32	q12,q12,q10
2725	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2726	vmov		q2,q0
2727	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2728	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2729	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2730	vld1.32		{q12},[r3]!
2731	vadd.i32	q13,q13,q11
2732	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2733	vmov		q2,q0
2734	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2735	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2736	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2737	vld1.32		{q13},[r3]!
2738	vadd.i32	q12,q12,q8
2739	vmov		q2,q0
2740	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2741	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2742
2743	vld1.32		{q12},[r3]!
2744	vadd.i32	q13,q13,q9
2745	vmov		q2,q0
2746	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2747	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2748
2749	vld1.32		{q13},[r3]
2750	vadd.i32	q12,q12,q10
2751	sub		r3,r3,#256-16	@ rewind
2752	vmov		q2,q0
2753	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2754	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2755
2756	vadd.i32	q13,q13,q11
2757	vmov		q2,q0
2758	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2759	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2760
2761	vadd.i32	q0,q0,q14
2762	vadd.i32	q1,q1,q15
2763	it		ne
2764	bne		.Loop_v8
2765
2766	vst1.32		{q0,q1},[r0]
2767
2768	bx	lr		@ bx lr
2769.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2770#endif
2771.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2772.align	2
2773#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2774.comm   OPENSSL_armcap_P,4,4
2775#endif
2776