1/*
2 * ====================================================================
3 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4 * project. Rights for redistribution and usage in source and binary
5 * forms are granted according to the OpenSSL license.
6 * ====================================================================
7 *
8 * sha256/512_block procedure for x86_64.
9 *
10 * 40% improvement over compiler-generated code on Opteron. On EM64T
11 * sha256 was observed to run >80% faster and sha512 - >40%. No magical
12 * tricks, just straight implementation... I really wonder why gcc
13 * [being armed with inline assembler] fails to generate as fast code.
14 * The only thing which is cool about this module is that it's very
15 * same instruction sequence used for both SHA-256 and SHA-512. In
16 * former case the instructions operate on 32-bit operands, while in
17 * latter - on 64-bit ones. All I had to do is to get one flavor right,
18 * the other one passed the test right away:-)
19 *
20 * sha256_block runs in ~1005 cycles on Opteron, which gives you
21 * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
22 * frequency in GHz. sha512_block runs in ~1275 cycles, which results
23 * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
24 * Well, if you compare it to IA-64 implementation, which maintains
25 * X[16] in register bank[!], tends to 4 instructions per CPU clock
26 * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
27 * issue Opteron pipeline and X[16] maintained in memory. So that *if*
28 * there is a way to improve it, *then* the only way would be to try to
29 * offload X[16] updates to SSE unit, but that would require "deeper"
30 * loop unroll, which in turn would naturally cause size blow-up, not
31 * to mention increased complexity! And once again, only *if* it's
32 * actually possible to noticeably improve overall ILP, instruction
33 * level parallelism, on a given CPU implementation in this case.
34 *
35 * Special note on Intel EM64T. While Opteron CPU exhibits perfect
36 * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
37 * [currently available] EM64T CPUs apparently are far from it. On the
38 * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
39 * sha256_block:-( This is presumably because 64-bit shifts/rotates
40 * apparently are not atomic instructions, but implemented in microcode.
41 */
42
43/*
44 * OpenSolaris OS modifications
45 *
46 * Sun elects to use this software under the BSD license.
47 *
48 * This source originates from OpenSSL file sha512-x86_64.pl at
49 * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
50 * (presumably for future OpenSSL release 0.9.8h), with these changes:
51 *
52 * 1. Added perl "use strict" and declared variables.
53 *
54 * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
55 * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
56 *
57 * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
58 * assemblers).  Replaced the .picmeup macro with assembler code.
59 *
60 * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
61 * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
62 */
63
64/*
65 * This file was generated by a perl script (sha512-x86_64.pl) that were
66 * used to generate sha256 and sha512 variants from the same code base.
67 * The comments from the original file have been pasted above.
68 */
69
70
71#if defined(lint) || defined(__lint)
72#include <sys/stdint.h>
73#include <sha2/sha2.h>
74
75/* ARGSUSED */
76void
77SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
78{
79}
80
81
82#else
83#define _ASM
84#include <sys/asm_linkage.h>
85
86ENTRY_NP(SHA512TransformBlocks)
87	push	%rbx
88	push	%rbp
89	push	%r12
90	push	%r13
91	push	%r14
92	push	%r15
93	mov	%rsp,%rbp		# copy %rsp
94	shl	$4,%rdx		# num*16
95	sub	$16*8+4*8,%rsp
96	lea	(%rsi,%rdx,8),%rdx	# inp+num*16*8
97	and	$-64,%rsp		# align stack frame
98	add	$8,%rdi		# Skip OpenSolaris field, "algotype"
99	mov	%rdi,16*8+0*8(%rsp)		# save ctx, 1st arg
100	mov	%rsi,16*8+1*8(%rsp)		# save inp, 2nd arg
101	mov	%rdx,16*8+2*8(%rsp)		# save end pointer, "3rd" arg
102	mov	%rbp,16*8+3*8(%rsp)		# save copy of %rsp
103
104	#.picmeup %rbp
105	# The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
106	# the address of the "next" instruction into the target register
107	# (%rbp).  This generates these 2 instructions:
108	lea	.Llea(%rip),%rbp
109	#nop	# .picmeup generates a nop for mod 8 alignment--not needed here
110
111.Llea:
112	lea	K512-.(%rbp),%rbp
113
114	mov	8*0(%rdi),%rax
115	mov	8*1(%rdi),%rbx
116	mov	8*2(%rdi),%rcx
117	mov	8*3(%rdi),%rdx
118	mov	8*4(%rdi),%r8
119	mov	8*5(%rdi),%r9
120	mov	8*6(%rdi),%r10
121	mov	8*7(%rdi),%r11
122	jmp	.Lloop
123
124.align	16
125.Lloop:
126	xor	%rdi,%rdi
127	mov	8*0(%rsi),%r12
128	bswap	%r12
129	mov	%r8,%r13
130	mov	%r8,%r14
131	mov	%r9,%r15
132
133	ror	$14,%r13
134	ror	$18,%r14
135	xor	%r10,%r15			# f^g
136
137	xor	%r14,%r13
138	ror	$23,%r14
139	and	%r8,%r15			# (f^g)&e
140	mov	%r12,0(%rsp)
141
142	xor	%r14,%r13			# Sigma1(e)
143	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
144	add	%r11,%r12			# T1+=h
145
146	mov	%rax,%r11
147	add	%r13,%r12			# T1+=Sigma1(e)
148
149	add	%r15,%r12			# T1+=Ch(e,f,g)
150	mov	%rax,%r13
151	mov	%rax,%r14
152
153	ror	$28,%r11
154	ror	$34,%r13
155	mov	%rax,%r15
156	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
157
158	xor	%r13,%r11
159	ror	$5,%r13
160	or	%rcx,%r14			# a|c
161
162	xor	%r13,%r11			# h=Sigma0(a)
163	and	%rcx,%r15			# a&c
164	add	%r12,%rdx			# d+=T1
165
166	and	%rbx,%r14			# (a|c)&b
167	add	%r12,%r11			# h+=T1
168
169	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
170	lea	1(%rdi),%rdi	# round++
171
172	add	%r14,%r11			# h+=Maj(a,b,c)
173	mov	8*1(%rsi),%r12
174	bswap	%r12
175	mov	%rdx,%r13
176	mov	%rdx,%r14
177	mov	%r8,%r15
178
179	ror	$14,%r13
180	ror	$18,%r14
181	xor	%r9,%r15			# f^g
182
183	xor	%r14,%r13
184	ror	$23,%r14
185	and	%rdx,%r15			# (f^g)&e
186	mov	%r12,8(%rsp)
187
188	xor	%r14,%r13			# Sigma1(e)
189	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
190	add	%r10,%r12			# T1+=h
191
192	mov	%r11,%r10
193	add	%r13,%r12			# T1+=Sigma1(e)
194
195	add	%r15,%r12			# T1+=Ch(e,f,g)
196	mov	%r11,%r13
197	mov	%r11,%r14
198
199	ror	$28,%r10
200	ror	$34,%r13
201	mov	%r11,%r15
202	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
203
204	xor	%r13,%r10
205	ror	$5,%r13
206	or	%rbx,%r14			# a|c
207
208	xor	%r13,%r10			# h=Sigma0(a)
209	and	%rbx,%r15			# a&c
210	add	%r12,%rcx			# d+=T1
211
212	and	%rax,%r14			# (a|c)&b
213	add	%r12,%r10			# h+=T1
214
215	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
216	lea	1(%rdi),%rdi	# round++
217
218	add	%r14,%r10			# h+=Maj(a,b,c)
219	mov	8*2(%rsi),%r12
220	bswap	%r12
221	mov	%rcx,%r13
222	mov	%rcx,%r14
223	mov	%rdx,%r15
224
225	ror	$14,%r13
226	ror	$18,%r14
227	xor	%r8,%r15			# f^g
228
229	xor	%r14,%r13
230	ror	$23,%r14
231	and	%rcx,%r15			# (f^g)&e
232	mov	%r12,16(%rsp)
233
234	xor	%r14,%r13			# Sigma1(e)
235	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
236	add	%r9,%r12			# T1+=h
237
238	mov	%r10,%r9
239	add	%r13,%r12			# T1+=Sigma1(e)
240
241	add	%r15,%r12			# T1+=Ch(e,f,g)
242	mov	%r10,%r13
243	mov	%r10,%r14
244
245	ror	$28,%r9
246	ror	$34,%r13
247	mov	%r10,%r15
248	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
249
250	xor	%r13,%r9
251	ror	$5,%r13
252	or	%rax,%r14			# a|c
253
254	xor	%r13,%r9			# h=Sigma0(a)
255	and	%rax,%r15			# a&c
256	add	%r12,%rbx			# d+=T1
257
258	and	%r11,%r14			# (a|c)&b
259	add	%r12,%r9			# h+=T1
260
261	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
262	lea	1(%rdi),%rdi	# round++
263
264	add	%r14,%r9			# h+=Maj(a,b,c)
265	mov	8*3(%rsi),%r12
266	bswap	%r12
267	mov	%rbx,%r13
268	mov	%rbx,%r14
269	mov	%rcx,%r15
270
271	ror	$14,%r13
272	ror	$18,%r14
273	xor	%rdx,%r15			# f^g
274
275	xor	%r14,%r13
276	ror	$23,%r14
277	and	%rbx,%r15			# (f^g)&e
278	mov	%r12,24(%rsp)
279
280	xor	%r14,%r13			# Sigma1(e)
281	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
282	add	%r8,%r12			# T1+=h
283
284	mov	%r9,%r8
285	add	%r13,%r12			# T1+=Sigma1(e)
286
287	add	%r15,%r12			# T1+=Ch(e,f,g)
288	mov	%r9,%r13
289	mov	%r9,%r14
290
291	ror	$28,%r8
292	ror	$34,%r13
293	mov	%r9,%r15
294	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
295
296	xor	%r13,%r8
297	ror	$5,%r13
298	or	%r11,%r14			# a|c
299
300	xor	%r13,%r8			# h=Sigma0(a)
301	and	%r11,%r15			# a&c
302	add	%r12,%rax			# d+=T1
303
304	and	%r10,%r14			# (a|c)&b
305	add	%r12,%r8			# h+=T1
306
307	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
308	lea	1(%rdi),%rdi	# round++
309
310	add	%r14,%r8			# h+=Maj(a,b,c)
311	mov	8*4(%rsi),%r12
312	bswap	%r12
313	mov	%rax,%r13
314	mov	%rax,%r14
315	mov	%rbx,%r15
316
317	ror	$14,%r13
318	ror	$18,%r14
319	xor	%rcx,%r15			# f^g
320
321	xor	%r14,%r13
322	ror	$23,%r14
323	and	%rax,%r15			# (f^g)&e
324	mov	%r12,32(%rsp)
325
326	xor	%r14,%r13			# Sigma1(e)
327	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
328	add	%rdx,%r12			# T1+=h
329
330	mov	%r8,%rdx
331	add	%r13,%r12			# T1+=Sigma1(e)
332
333	add	%r15,%r12			# T1+=Ch(e,f,g)
334	mov	%r8,%r13
335	mov	%r8,%r14
336
337	ror	$28,%rdx
338	ror	$34,%r13
339	mov	%r8,%r15
340	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
341
342	xor	%r13,%rdx
343	ror	$5,%r13
344	or	%r10,%r14			# a|c
345
346	xor	%r13,%rdx			# h=Sigma0(a)
347	and	%r10,%r15			# a&c
348	add	%r12,%r11			# d+=T1
349
350	and	%r9,%r14			# (a|c)&b
351	add	%r12,%rdx			# h+=T1
352
353	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
354	lea	1(%rdi),%rdi	# round++
355
356	add	%r14,%rdx			# h+=Maj(a,b,c)
357	mov	8*5(%rsi),%r12
358	bswap	%r12
359	mov	%r11,%r13
360	mov	%r11,%r14
361	mov	%rax,%r15
362
363	ror	$14,%r13
364	ror	$18,%r14
365	xor	%rbx,%r15			# f^g
366
367	xor	%r14,%r13
368	ror	$23,%r14
369	and	%r11,%r15			# (f^g)&e
370	mov	%r12,40(%rsp)
371
372	xor	%r14,%r13			# Sigma1(e)
373	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
374	add	%rcx,%r12			# T1+=h
375
376	mov	%rdx,%rcx
377	add	%r13,%r12			# T1+=Sigma1(e)
378
379	add	%r15,%r12			# T1+=Ch(e,f,g)
380	mov	%rdx,%r13
381	mov	%rdx,%r14
382
383	ror	$28,%rcx
384	ror	$34,%r13
385	mov	%rdx,%r15
386	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
387
388	xor	%r13,%rcx
389	ror	$5,%r13
390	or	%r9,%r14			# a|c
391
392	xor	%r13,%rcx			# h=Sigma0(a)
393	and	%r9,%r15			# a&c
394	add	%r12,%r10			# d+=T1
395
396	and	%r8,%r14			# (a|c)&b
397	add	%r12,%rcx			# h+=T1
398
399	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
400	lea	1(%rdi),%rdi	# round++
401
402	add	%r14,%rcx			# h+=Maj(a,b,c)
403	mov	8*6(%rsi),%r12
404	bswap	%r12
405	mov	%r10,%r13
406	mov	%r10,%r14
407	mov	%r11,%r15
408
409	ror	$14,%r13
410	ror	$18,%r14
411	xor	%rax,%r15			# f^g
412
413	xor	%r14,%r13
414	ror	$23,%r14
415	and	%r10,%r15			# (f^g)&e
416	mov	%r12,48(%rsp)
417
418	xor	%r14,%r13			# Sigma1(e)
419	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
420	add	%rbx,%r12			# T1+=h
421
422	mov	%rcx,%rbx
423	add	%r13,%r12			# T1+=Sigma1(e)
424
425	add	%r15,%r12			# T1+=Ch(e,f,g)
426	mov	%rcx,%r13
427	mov	%rcx,%r14
428
429	ror	$28,%rbx
430	ror	$34,%r13
431	mov	%rcx,%r15
432	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
433
434	xor	%r13,%rbx
435	ror	$5,%r13
436	or	%r8,%r14			# a|c
437
438	xor	%r13,%rbx			# h=Sigma0(a)
439	and	%r8,%r15			# a&c
440	add	%r12,%r9			# d+=T1
441
442	and	%rdx,%r14			# (a|c)&b
443	add	%r12,%rbx			# h+=T1
444
445	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
446	lea	1(%rdi),%rdi	# round++
447
448	add	%r14,%rbx			# h+=Maj(a,b,c)
449	mov	8*7(%rsi),%r12
450	bswap	%r12
451	mov	%r9,%r13
452	mov	%r9,%r14
453	mov	%r10,%r15
454
455	ror	$14,%r13
456	ror	$18,%r14
457	xor	%r11,%r15			# f^g
458
459	xor	%r14,%r13
460	ror	$23,%r14
461	and	%r9,%r15			# (f^g)&e
462	mov	%r12,56(%rsp)
463
464	xor	%r14,%r13			# Sigma1(e)
465	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
466	add	%rax,%r12			# T1+=h
467
468	mov	%rbx,%rax
469	add	%r13,%r12			# T1+=Sigma1(e)
470
471	add	%r15,%r12			# T1+=Ch(e,f,g)
472	mov	%rbx,%r13
473	mov	%rbx,%r14
474
475	ror	$28,%rax
476	ror	$34,%r13
477	mov	%rbx,%r15
478	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
479
480	xor	%r13,%rax
481	ror	$5,%r13
482	or	%rdx,%r14			# a|c
483
484	xor	%r13,%rax			# h=Sigma0(a)
485	and	%rdx,%r15			# a&c
486	add	%r12,%r8			# d+=T1
487
488	and	%rcx,%r14			# (a|c)&b
489	add	%r12,%rax			# h+=T1
490
491	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
492	lea	1(%rdi),%rdi	# round++
493
494	add	%r14,%rax			# h+=Maj(a,b,c)
495	mov	8*8(%rsi),%r12
496	bswap	%r12
497	mov	%r8,%r13
498	mov	%r8,%r14
499	mov	%r9,%r15
500
501	ror	$14,%r13
502	ror	$18,%r14
503	xor	%r10,%r15			# f^g
504
505	xor	%r14,%r13
506	ror	$23,%r14
507	and	%r8,%r15			# (f^g)&e
508	mov	%r12,64(%rsp)
509
510	xor	%r14,%r13			# Sigma1(e)
511	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
512	add	%r11,%r12			# T1+=h
513
514	mov	%rax,%r11
515	add	%r13,%r12			# T1+=Sigma1(e)
516
517	add	%r15,%r12			# T1+=Ch(e,f,g)
518	mov	%rax,%r13
519	mov	%rax,%r14
520
521	ror	$28,%r11
522	ror	$34,%r13
523	mov	%rax,%r15
524	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
525
526	xor	%r13,%r11
527	ror	$5,%r13
528	or	%rcx,%r14			# a|c
529
530	xor	%r13,%r11			# h=Sigma0(a)
531	and	%rcx,%r15			# a&c
532	add	%r12,%rdx			# d+=T1
533
534	and	%rbx,%r14			# (a|c)&b
535	add	%r12,%r11			# h+=T1
536
537	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
538	lea	1(%rdi),%rdi	# round++
539
540	add	%r14,%r11			# h+=Maj(a,b,c)
541	mov	8*9(%rsi),%r12
542	bswap	%r12
543	mov	%rdx,%r13
544	mov	%rdx,%r14
545	mov	%r8,%r15
546
547	ror	$14,%r13
548	ror	$18,%r14
549	xor	%r9,%r15			# f^g
550
551	xor	%r14,%r13
552	ror	$23,%r14
553	and	%rdx,%r15			# (f^g)&e
554	mov	%r12,72(%rsp)
555
556	xor	%r14,%r13			# Sigma1(e)
557	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
558	add	%r10,%r12			# T1+=h
559
560	mov	%r11,%r10
561	add	%r13,%r12			# T1+=Sigma1(e)
562
563	add	%r15,%r12			# T1+=Ch(e,f,g)
564	mov	%r11,%r13
565	mov	%r11,%r14
566
567	ror	$28,%r10
568	ror	$34,%r13
569	mov	%r11,%r15
570	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
571
572	xor	%r13,%r10
573	ror	$5,%r13
574	or	%rbx,%r14			# a|c
575
576	xor	%r13,%r10			# h=Sigma0(a)
577	and	%rbx,%r15			# a&c
578	add	%r12,%rcx			# d+=T1
579
580	and	%rax,%r14			# (a|c)&b
581	add	%r12,%r10			# h+=T1
582
583	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
584	lea	1(%rdi),%rdi	# round++
585
586	add	%r14,%r10			# h+=Maj(a,b,c)
587	mov	8*10(%rsi),%r12
588	bswap	%r12
589	mov	%rcx,%r13
590	mov	%rcx,%r14
591	mov	%rdx,%r15
592
593	ror	$14,%r13
594	ror	$18,%r14
595	xor	%r8,%r15			# f^g
596
597	xor	%r14,%r13
598	ror	$23,%r14
599	and	%rcx,%r15			# (f^g)&e
600	mov	%r12,80(%rsp)
601
602	xor	%r14,%r13			# Sigma1(e)
603	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
604	add	%r9,%r12			# T1+=h
605
606	mov	%r10,%r9
607	add	%r13,%r12			# T1+=Sigma1(e)
608
609	add	%r15,%r12			# T1+=Ch(e,f,g)
610	mov	%r10,%r13
611	mov	%r10,%r14
612
613	ror	$28,%r9
614	ror	$34,%r13
615	mov	%r10,%r15
616	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
617
618	xor	%r13,%r9
619	ror	$5,%r13
620	or	%rax,%r14			# a|c
621
622	xor	%r13,%r9			# h=Sigma0(a)
623	and	%rax,%r15			# a&c
624	add	%r12,%rbx			# d+=T1
625
626	and	%r11,%r14			# (a|c)&b
627	add	%r12,%r9			# h+=T1
628
629	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
630	lea	1(%rdi),%rdi	# round++
631
632	add	%r14,%r9			# h+=Maj(a,b,c)
633	mov	8*11(%rsi),%r12
634	bswap	%r12
635	mov	%rbx,%r13
636	mov	%rbx,%r14
637	mov	%rcx,%r15
638
639	ror	$14,%r13
640	ror	$18,%r14
641	xor	%rdx,%r15			# f^g
642
643	xor	%r14,%r13
644	ror	$23,%r14
645	and	%rbx,%r15			# (f^g)&e
646	mov	%r12,88(%rsp)
647
648	xor	%r14,%r13			# Sigma1(e)
649	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
650	add	%r8,%r12			# T1+=h
651
652	mov	%r9,%r8
653	add	%r13,%r12			# T1+=Sigma1(e)
654
655	add	%r15,%r12			# T1+=Ch(e,f,g)
656	mov	%r9,%r13
657	mov	%r9,%r14
658
659	ror	$28,%r8
660	ror	$34,%r13
661	mov	%r9,%r15
662	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
663
664	xor	%r13,%r8
665	ror	$5,%r13
666	or	%r11,%r14			# a|c
667
668	xor	%r13,%r8			# h=Sigma0(a)
669	and	%r11,%r15			# a&c
670	add	%r12,%rax			# d+=T1
671
672	and	%r10,%r14			# (a|c)&b
673	add	%r12,%r8			# h+=T1
674
675	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
676	lea	1(%rdi),%rdi	# round++
677
678	add	%r14,%r8			# h+=Maj(a,b,c)
679	mov	8*12(%rsi),%r12
680	bswap	%r12
681	mov	%rax,%r13
682	mov	%rax,%r14
683	mov	%rbx,%r15
684
685	ror	$14,%r13
686	ror	$18,%r14
687	xor	%rcx,%r15			# f^g
688
689	xor	%r14,%r13
690	ror	$23,%r14
691	and	%rax,%r15			# (f^g)&e
692	mov	%r12,96(%rsp)
693
694	xor	%r14,%r13			# Sigma1(e)
695	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
696	add	%rdx,%r12			# T1+=h
697
698	mov	%r8,%rdx
699	add	%r13,%r12			# T1+=Sigma1(e)
700
701	add	%r15,%r12			# T1+=Ch(e,f,g)
702	mov	%r8,%r13
703	mov	%r8,%r14
704
705	ror	$28,%rdx
706	ror	$34,%r13
707	mov	%r8,%r15
708	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
709
710	xor	%r13,%rdx
711	ror	$5,%r13
712	or	%r10,%r14			# a|c
713
714	xor	%r13,%rdx			# h=Sigma0(a)
715	and	%r10,%r15			# a&c
716	add	%r12,%r11			# d+=T1
717
718	and	%r9,%r14			# (a|c)&b
719	add	%r12,%rdx			# h+=T1
720
721	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
722	lea	1(%rdi),%rdi	# round++
723
724	add	%r14,%rdx			# h+=Maj(a,b,c)
725	mov	8*13(%rsi),%r12
726	bswap	%r12
727	mov	%r11,%r13
728	mov	%r11,%r14
729	mov	%rax,%r15
730
731	ror	$14,%r13
732	ror	$18,%r14
733	xor	%rbx,%r15			# f^g
734
735	xor	%r14,%r13
736	ror	$23,%r14
737	and	%r11,%r15			# (f^g)&e
738	mov	%r12,104(%rsp)
739
740	xor	%r14,%r13			# Sigma1(e)
741	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
742	add	%rcx,%r12			# T1+=h
743
744	mov	%rdx,%rcx
745	add	%r13,%r12			# T1+=Sigma1(e)
746
747	add	%r15,%r12			# T1+=Ch(e,f,g)
748	mov	%rdx,%r13
749	mov	%rdx,%r14
750
751	ror	$28,%rcx
752	ror	$34,%r13
753	mov	%rdx,%r15
754	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
755
756	xor	%r13,%rcx
757	ror	$5,%r13
758	or	%r9,%r14			# a|c
759
760	xor	%r13,%rcx			# h=Sigma0(a)
761	and	%r9,%r15			# a&c
762	add	%r12,%r10			# d+=T1
763
764	and	%r8,%r14			# (a|c)&b
765	add	%r12,%rcx			# h+=T1
766
767	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
768	lea	1(%rdi),%rdi	# round++
769
770	add	%r14,%rcx			# h+=Maj(a,b,c)
771	mov	8*14(%rsi),%r12
772	bswap	%r12
773	mov	%r10,%r13
774	mov	%r10,%r14
775	mov	%r11,%r15
776
777	ror	$14,%r13
778	ror	$18,%r14
779	xor	%rax,%r15			# f^g
780
781	xor	%r14,%r13
782	ror	$23,%r14
783	and	%r10,%r15			# (f^g)&e
784	mov	%r12,112(%rsp)
785
786	xor	%r14,%r13			# Sigma1(e)
787	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
788	add	%rbx,%r12			# T1+=h
789
790	mov	%rcx,%rbx
791	add	%r13,%r12			# T1+=Sigma1(e)
792
793	add	%r15,%r12			# T1+=Ch(e,f,g)
794	mov	%rcx,%r13
795	mov	%rcx,%r14
796
797	ror	$28,%rbx
798	ror	$34,%r13
799	mov	%rcx,%r15
800	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
801
802	xor	%r13,%rbx
803	ror	$5,%r13
804	or	%r8,%r14			# a|c
805
806	xor	%r13,%rbx			# h=Sigma0(a)
807	and	%r8,%r15			# a&c
808	add	%r12,%r9			# d+=T1
809
810	and	%rdx,%r14			# (a|c)&b
811	add	%r12,%rbx			# h+=T1
812
813	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
814	lea	1(%rdi),%rdi	# round++
815
816	add	%r14,%rbx			# h+=Maj(a,b,c)
817	mov	8*15(%rsi),%r12
818	bswap	%r12
819	mov	%r9,%r13
820	mov	%r9,%r14
821	mov	%r10,%r15
822
823	ror	$14,%r13
824	ror	$18,%r14
825	xor	%r11,%r15			# f^g
826
827	xor	%r14,%r13
828	ror	$23,%r14
829	and	%r9,%r15			# (f^g)&e
830	mov	%r12,120(%rsp)
831
832	xor	%r14,%r13			# Sigma1(e)
833	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
834	add	%rax,%r12			# T1+=h
835
836	mov	%rbx,%rax
837	add	%r13,%r12			# T1+=Sigma1(e)
838
839	add	%r15,%r12			# T1+=Ch(e,f,g)
840	mov	%rbx,%r13
841	mov	%rbx,%r14
842
843	ror	$28,%rax
844	ror	$34,%r13
845	mov	%rbx,%r15
846	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
847
848	xor	%r13,%rax
849	ror	$5,%r13
850	or	%rdx,%r14			# a|c
851
852	xor	%r13,%rax			# h=Sigma0(a)
853	and	%rdx,%r15			# a&c
854	add	%r12,%r8			# d+=T1
855
856	and	%rcx,%r14			# (a|c)&b
857	add	%r12,%rax			# h+=T1
858
859	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
860	lea	1(%rdi),%rdi	# round++
861
862	add	%r14,%rax			# h+=Maj(a,b,c)
863	jmp	.Lrounds_16_xx
864.align	16
865.Lrounds_16_xx:
866	mov	8(%rsp),%r13
867	mov	112(%rsp),%r12
868
869	mov	%r13,%r15
870
871	shr	$7,%r13
872	ror	$1,%r15
873
874	xor	%r15,%r13
875	ror	$7,%r15
876
877	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
878	mov	%r12,%r14
879
880	shr	$6,%r12
881	ror	$19,%r14
882
883	xor	%r14,%r12
884	ror	$42,%r14
885
886	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
887
888	add	%r13,%r12
889
890	add	72(%rsp),%r12
891
892	add	0(%rsp),%r12
893	mov	%r8,%r13
894	mov	%r8,%r14
895	mov	%r9,%r15
896
897	ror	$14,%r13
898	ror	$18,%r14
899	xor	%r10,%r15			# f^g
900
901	xor	%r14,%r13
902	ror	$23,%r14
903	and	%r8,%r15			# (f^g)&e
904	mov	%r12,0(%rsp)
905
906	xor	%r14,%r13			# Sigma1(e)
907	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
908	add	%r11,%r12			# T1+=h
909
910	mov	%rax,%r11
911	add	%r13,%r12			# T1+=Sigma1(e)
912
913	add	%r15,%r12			# T1+=Ch(e,f,g)
914	mov	%rax,%r13
915	mov	%rax,%r14
916
917	ror	$28,%r11
918	ror	$34,%r13
919	mov	%rax,%r15
920	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
921
922	xor	%r13,%r11
923	ror	$5,%r13
924	or	%rcx,%r14			# a|c
925
926	xor	%r13,%r11			# h=Sigma0(a)
927	and	%rcx,%r15			# a&c
928	add	%r12,%rdx			# d+=T1
929
930	and	%rbx,%r14			# (a|c)&b
931	add	%r12,%r11			# h+=T1
932
933	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
934	lea	1(%rdi),%rdi	# round++
935
936	add	%r14,%r11			# h+=Maj(a,b,c)
937	mov	16(%rsp),%r13
938	mov	120(%rsp),%r12
939
940	mov	%r13,%r15
941
942	shr	$7,%r13
943	ror	$1,%r15
944
945	xor	%r15,%r13
946	ror	$7,%r15
947
948	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
949	mov	%r12,%r14
950
951	shr	$6,%r12
952	ror	$19,%r14
953
954	xor	%r14,%r12
955	ror	$42,%r14
956
957	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
958
959	add	%r13,%r12
960
961	add	80(%rsp),%r12
962
963	add	8(%rsp),%r12
964	mov	%rdx,%r13
965	mov	%rdx,%r14
966	mov	%r8,%r15
967
968	ror	$14,%r13
969	ror	$18,%r14
970	xor	%r9,%r15			# f^g
971
972	xor	%r14,%r13
973	ror	$23,%r14
974	and	%rdx,%r15			# (f^g)&e
975	mov	%r12,8(%rsp)
976
977	xor	%r14,%r13			# Sigma1(e)
978	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
979	add	%r10,%r12			# T1+=h
980
981	mov	%r11,%r10
982	add	%r13,%r12			# T1+=Sigma1(e)
983
984	add	%r15,%r12			# T1+=Ch(e,f,g)
985	mov	%r11,%r13
986	mov	%r11,%r14
987
988	ror	$28,%r10
989	ror	$34,%r13
990	mov	%r11,%r15
991	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
992
993	xor	%r13,%r10
994	ror	$5,%r13
995	or	%rbx,%r14			# a|c
996
997	xor	%r13,%r10			# h=Sigma0(a)
998	and	%rbx,%r15			# a&c
999	add	%r12,%rcx			# d+=T1
1000
1001	and	%rax,%r14			# (a|c)&b
1002	add	%r12,%r10			# h+=T1
1003
1004	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1005	lea	1(%rdi),%rdi	# round++
1006
1007	add	%r14,%r10			# h+=Maj(a,b,c)
1008	mov	24(%rsp),%r13
1009	mov	0(%rsp),%r12
1010
1011	mov	%r13,%r15
1012
1013	shr	$7,%r13
1014	ror	$1,%r15
1015
1016	xor	%r15,%r13
1017	ror	$7,%r15
1018
1019	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1020	mov	%r12,%r14
1021
1022	shr	$6,%r12
1023	ror	$19,%r14
1024
1025	xor	%r14,%r12
1026	ror	$42,%r14
1027
1028	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1029
1030	add	%r13,%r12
1031
1032	add	88(%rsp),%r12
1033
1034	add	16(%rsp),%r12
1035	mov	%rcx,%r13
1036	mov	%rcx,%r14
1037	mov	%rdx,%r15
1038
1039	ror	$14,%r13
1040	ror	$18,%r14
1041	xor	%r8,%r15			# f^g
1042
1043	xor	%r14,%r13
1044	ror	$23,%r14
1045	and	%rcx,%r15			# (f^g)&e
1046	mov	%r12,16(%rsp)
1047
1048	xor	%r14,%r13			# Sigma1(e)
1049	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
1050	add	%r9,%r12			# T1+=h
1051
1052	mov	%r10,%r9
1053	add	%r13,%r12			# T1+=Sigma1(e)
1054
1055	add	%r15,%r12			# T1+=Ch(e,f,g)
1056	mov	%r10,%r13
1057	mov	%r10,%r14
1058
1059	ror	$28,%r9
1060	ror	$34,%r13
1061	mov	%r10,%r15
1062	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1063
1064	xor	%r13,%r9
1065	ror	$5,%r13
1066	or	%rax,%r14			# a|c
1067
1068	xor	%r13,%r9			# h=Sigma0(a)
1069	and	%rax,%r15			# a&c
1070	add	%r12,%rbx			# d+=T1
1071
1072	and	%r11,%r14			# (a|c)&b
1073	add	%r12,%r9			# h+=T1
1074
1075	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1076	lea	1(%rdi),%rdi	# round++
1077
1078	add	%r14,%r9			# h+=Maj(a,b,c)
1079	mov	32(%rsp),%r13
1080	mov	8(%rsp),%r12
1081
1082	mov	%r13,%r15
1083
1084	shr	$7,%r13
1085	ror	$1,%r15
1086
1087	xor	%r15,%r13
1088	ror	$7,%r15
1089
1090	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1091	mov	%r12,%r14
1092
1093	shr	$6,%r12
1094	ror	$19,%r14
1095
1096	xor	%r14,%r12
1097	ror	$42,%r14
1098
1099	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1100
1101	add	%r13,%r12
1102
1103	add	96(%rsp),%r12
1104
1105	add	24(%rsp),%r12
1106	mov	%rbx,%r13
1107	mov	%rbx,%r14
1108	mov	%rcx,%r15
1109
1110	ror	$14,%r13
1111	ror	$18,%r14
1112	xor	%rdx,%r15			# f^g
1113
1114	xor	%r14,%r13
1115	ror	$23,%r14
1116	and	%rbx,%r15			# (f^g)&e
1117	mov	%r12,24(%rsp)
1118
1119	xor	%r14,%r13			# Sigma1(e)
1120	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1121	add	%r8,%r12			# T1+=h
1122
1123	mov	%r9,%r8
1124	add	%r13,%r12			# T1+=Sigma1(e)
1125
1126	add	%r15,%r12			# T1+=Ch(e,f,g)
1127	mov	%r9,%r13
1128	mov	%r9,%r14
1129
1130	ror	$28,%r8
1131	ror	$34,%r13
1132	mov	%r9,%r15
1133	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1134
1135	xor	%r13,%r8
1136	ror	$5,%r13
1137	or	%r11,%r14			# a|c
1138
1139	xor	%r13,%r8			# h=Sigma0(a)
1140	and	%r11,%r15			# a&c
1141	add	%r12,%rax			# d+=T1
1142
1143	and	%r10,%r14			# (a|c)&b
1144	add	%r12,%r8			# h+=T1
1145
1146	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1147	lea	1(%rdi),%rdi	# round++
1148
1149	add	%r14,%r8			# h+=Maj(a,b,c)
1150	mov	40(%rsp),%r13
1151	mov	16(%rsp),%r12
1152
1153	mov	%r13,%r15
1154
1155	shr	$7,%r13
1156	ror	$1,%r15
1157
1158	xor	%r15,%r13
1159	ror	$7,%r15
1160
1161	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1162	mov	%r12,%r14
1163
1164	shr	$6,%r12
1165	ror	$19,%r14
1166
1167	xor	%r14,%r12
1168	ror	$42,%r14
1169
1170	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1171
1172	add	%r13,%r12
1173
1174	add	104(%rsp),%r12
1175
1176	add	32(%rsp),%r12
1177	mov	%rax,%r13
1178	mov	%rax,%r14
1179	mov	%rbx,%r15
1180
1181	ror	$14,%r13
1182	ror	$18,%r14
1183	xor	%rcx,%r15			# f^g
1184
1185	xor	%r14,%r13
1186	ror	$23,%r14
1187	and	%rax,%r15			# (f^g)&e
1188	mov	%r12,32(%rsp)
1189
1190	xor	%r14,%r13			# Sigma1(e)
1191	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1192	add	%rdx,%r12			# T1+=h
1193
1194	mov	%r8,%rdx
1195	add	%r13,%r12			# T1+=Sigma1(e)
1196
1197	add	%r15,%r12			# T1+=Ch(e,f,g)
1198	mov	%r8,%r13
1199	mov	%r8,%r14
1200
1201	ror	$28,%rdx
1202	ror	$34,%r13
1203	mov	%r8,%r15
1204	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1205
1206	xor	%r13,%rdx
1207	ror	$5,%r13
1208	or	%r10,%r14			# a|c
1209
1210	xor	%r13,%rdx			# h=Sigma0(a)
1211	and	%r10,%r15			# a&c
1212	add	%r12,%r11			# d+=T1
1213
1214	and	%r9,%r14			# (a|c)&b
1215	add	%r12,%rdx			# h+=T1
1216
1217	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1218	lea	1(%rdi),%rdi	# round++
1219
1220	add	%r14,%rdx			# h+=Maj(a,b,c)
1221	mov	48(%rsp),%r13
1222	mov	24(%rsp),%r12
1223
1224	mov	%r13,%r15
1225
1226	shr	$7,%r13
1227	ror	$1,%r15
1228
1229	xor	%r15,%r13
1230	ror	$7,%r15
1231
1232	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1233	mov	%r12,%r14
1234
1235	shr	$6,%r12
1236	ror	$19,%r14
1237
1238	xor	%r14,%r12
1239	ror	$42,%r14
1240
1241	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1242
1243	add	%r13,%r12
1244
1245	add	112(%rsp),%r12
1246
1247	add	40(%rsp),%r12
1248	mov	%r11,%r13
1249	mov	%r11,%r14
1250	mov	%rax,%r15
1251
1252	ror	$14,%r13
1253	ror	$18,%r14
1254	xor	%rbx,%r15			# f^g
1255
1256	xor	%r14,%r13
1257	ror	$23,%r14
1258	and	%r11,%r15			# (f^g)&e
1259	mov	%r12,40(%rsp)
1260
1261	xor	%r14,%r13			# Sigma1(e)
1262	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1263	add	%rcx,%r12			# T1+=h
1264
1265	mov	%rdx,%rcx
1266	add	%r13,%r12			# T1+=Sigma1(e)
1267
1268	add	%r15,%r12			# T1+=Ch(e,f,g)
1269	mov	%rdx,%r13
1270	mov	%rdx,%r14
1271
1272	ror	$28,%rcx
1273	ror	$34,%r13
1274	mov	%rdx,%r15
1275	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1276
1277	xor	%r13,%rcx
1278	ror	$5,%r13
1279	or	%r9,%r14			# a|c
1280
1281	xor	%r13,%rcx			# h=Sigma0(a)
1282	and	%r9,%r15			# a&c
1283	add	%r12,%r10			# d+=T1
1284
1285	and	%r8,%r14			# (a|c)&b
1286	add	%r12,%rcx			# h+=T1
1287
1288	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1289	lea	1(%rdi),%rdi	# round++
1290
1291	add	%r14,%rcx			# h+=Maj(a,b,c)
1292	mov	56(%rsp),%r13
1293	mov	32(%rsp),%r12
1294
1295	mov	%r13,%r15
1296
1297	shr	$7,%r13
1298	ror	$1,%r15
1299
1300	xor	%r15,%r13
1301	ror	$7,%r15
1302
1303	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1304	mov	%r12,%r14
1305
1306	shr	$6,%r12
1307	ror	$19,%r14
1308
1309	xor	%r14,%r12
1310	ror	$42,%r14
1311
1312	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1313
1314	add	%r13,%r12
1315
1316	add	120(%rsp),%r12
1317
1318	add	48(%rsp),%r12
1319	mov	%r10,%r13
1320	mov	%r10,%r14
1321	mov	%r11,%r15
1322
1323	ror	$14,%r13
1324	ror	$18,%r14
1325	xor	%rax,%r15			# f^g
1326
1327	xor	%r14,%r13
1328	ror	$23,%r14
1329	and	%r10,%r15			# (f^g)&e
1330	mov	%r12,48(%rsp)
1331
1332	xor	%r14,%r13			# Sigma1(e)
1333	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
1334	add	%rbx,%r12			# T1+=h
1335
1336	mov	%rcx,%rbx
1337	add	%r13,%r12			# T1+=Sigma1(e)
1338
1339	add	%r15,%r12			# T1+=Ch(e,f,g)
1340	mov	%rcx,%r13
1341	mov	%rcx,%r14
1342
1343	ror	$28,%rbx
1344	ror	$34,%r13
1345	mov	%rcx,%r15
1346	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1347
1348	xor	%r13,%rbx
1349	ror	$5,%r13
1350	or	%r8,%r14			# a|c
1351
1352	xor	%r13,%rbx			# h=Sigma0(a)
1353	and	%r8,%r15			# a&c
1354	add	%r12,%r9			# d+=T1
1355
1356	and	%rdx,%r14			# (a|c)&b
1357	add	%r12,%rbx			# h+=T1
1358
1359	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1360	lea	1(%rdi),%rdi	# round++
1361
1362	add	%r14,%rbx			# h+=Maj(a,b,c)
1363	mov	64(%rsp),%r13
1364	mov	40(%rsp),%r12
1365
1366	mov	%r13,%r15
1367
1368	shr	$7,%r13
1369	ror	$1,%r15
1370
1371	xor	%r15,%r13
1372	ror	$7,%r15
1373
1374	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1375	mov	%r12,%r14
1376
1377	shr	$6,%r12
1378	ror	$19,%r14
1379
1380	xor	%r14,%r12
1381	ror	$42,%r14
1382
1383	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1384
1385	add	%r13,%r12
1386
1387	add	0(%rsp),%r12
1388
1389	add	56(%rsp),%r12
1390	mov	%r9,%r13
1391	mov	%r9,%r14
1392	mov	%r10,%r15
1393
1394	ror	$14,%r13
1395	ror	$18,%r14
1396	xor	%r11,%r15			# f^g
1397
1398	xor	%r14,%r13
1399	ror	$23,%r14
1400	and	%r9,%r15			# (f^g)&e
1401	mov	%r12,56(%rsp)
1402
1403	xor	%r14,%r13			# Sigma1(e)
1404	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
1405	add	%rax,%r12			# T1+=h
1406
1407	mov	%rbx,%rax
1408	add	%r13,%r12			# T1+=Sigma1(e)
1409
1410	add	%r15,%r12			# T1+=Ch(e,f,g)
1411	mov	%rbx,%r13
1412	mov	%rbx,%r14
1413
1414	ror	$28,%rax
1415	ror	$34,%r13
1416	mov	%rbx,%r15
1417	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1418
1419	xor	%r13,%rax
1420	ror	$5,%r13
1421	or	%rdx,%r14			# a|c
1422
1423	xor	%r13,%rax			# h=Sigma0(a)
1424	and	%rdx,%r15			# a&c
1425	add	%r12,%r8			# d+=T1
1426
1427	and	%rcx,%r14			# (a|c)&b
1428	add	%r12,%rax			# h+=T1
1429
1430	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1431	lea	1(%rdi),%rdi	# round++
1432
1433	add	%r14,%rax			# h+=Maj(a,b,c)
1434	mov	72(%rsp),%r13
1435	mov	48(%rsp),%r12
1436
1437	mov	%r13,%r15
1438
1439	shr	$7,%r13
1440	ror	$1,%r15
1441
1442	xor	%r15,%r13
1443	ror	$7,%r15
1444
1445	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1446	mov	%r12,%r14
1447
1448	shr	$6,%r12
1449	ror	$19,%r14
1450
1451	xor	%r14,%r12
1452	ror	$42,%r14
1453
1454	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1455
1456	add	%r13,%r12
1457
1458	add	8(%rsp),%r12
1459
1460	add	64(%rsp),%r12
1461	mov	%r8,%r13
1462	mov	%r8,%r14
1463	mov	%r9,%r15
1464
1465	ror	$14,%r13
1466	ror	$18,%r14
1467	xor	%r10,%r15			# f^g
1468
1469	xor	%r14,%r13
1470	ror	$23,%r14
1471	and	%r8,%r15			# (f^g)&e
1472	mov	%r12,64(%rsp)
1473
1474	xor	%r14,%r13			# Sigma1(e)
1475	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
1476	add	%r11,%r12			# T1+=h
1477
1478	mov	%rax,%r11
1479	add	%r13,%r12			# T1+=Sigma1(e)
1480
1481	add	%r15,%r12			# T1+=Ch(e,f,g)
1482	mov	%rax,%r13
1483	mov	%rax,%r14
1484
1485	ror	$28,%r11
1486	ror	$34,%r13
1487	mov	%rax,%r15
1488	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1489
1490	xor	%r13,%r11
1491	ror	$5,%r13
1492	or	%rcx,%r14			# a|c
1493
1494	xor	%r13,%r11			# h=Sigma0(a)
1495	and	%rcx,%r15			# a&c
1496	add	%r12,%rdx			# d+=T1
1497
1498	and	%rbx,%r14			# (a|c)&b
1499	add	%r12,%r11			# h+=T1
1500
1501	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1502	lea	1(%rdi),%rdi	# round++
1503
1504	add	%r14,%r11			# h+=Maj(a,b,c)
1505	mov	80(%rsp),%r13
1506	mov	56(%rsp),%r12
1507
1508	mov	%r13,%r15
1509
1510	shr	$7,%r13
1511	ror	$1,%r15
1512
1513	xor	%r15,%r13
1514	ror	$7,%r15
1515
1516	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1517	mov	%r12,%r14
1518
1519	shr	$6,%r12
1520	ror	$19,%r14
1521
1522	xor	%r14,%r12
1523	ror	$42,%r14
1524
1525	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1526
1527	add	%r13,%r12
1528
1529	add	16(%rsp),%r12
1530
1531	add	72(%rsp),%r12
1532	mov	%rdx,%r13
1533	mov	%rdx,%r14
1534	mov	%r8,%r15
1535
1536	ror	$14,%r13
1537	ror	$18,%r14
1538	xor	%r9,%r15			# f^g
1539
1540	xor	%r14,%r13
1541	ror	$23,%r14
1542	and	%rdx,%r15			# (f^g)&e
1543	mov	%r12,72(%rsp)
1544
1545	xor	%r14,%r13			# Sigma1(e)
1546	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
1547	add	%r10,%r12			# T1+=h
1548
1549	mov	%r11,%r10
1550	add	%r13,%r12			# T1+=Sigma1(e)
1551
1552	add	%r15,%r12			# T1+=Ch(e,f,g)
1553	mov	%r11,%r13
1554	mov	%r11,%r14
1555
1556	ror	$28,%r10
1557	ror	$34,%r13
1558	mov	%r11,%r15
1559	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1560
1561	xor	%r13,%r10
1562	ror	$5,%r13
1563	or	%rbx,%r14			# a|c
1564
1565	xor	%r13,%r10			# h=Sigma0(a)
1566	and	%rbx,%r15			# a&c
1567	add	%r12,%rcx			# d+=T1
1568
1569	and	%rax,%r14			# (a|c)&b
1570	add	%r12,%r10			# h+=T1
1571
1572	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1573	lea	1(%rdi),%rdi	# round++
1574
1575	add	%r14,%r10			# h+=Maj(a,b,c)
1576	mov	88(%rsp),%r13
1577	mov	64(%rsp),%r12
1578
1579	mov	%r13,%r15
1580
1581	shr	$7,%r13
1582	ror	$1,%r15
1583
1584	xor	%r15,%r13
1585	ror	$7,%r15
1586
1587	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1588	mov	%r12,%r14
1589
1590	shr	$6,%r12
1591	ror	$19,%r14
1592
1593	xor	%r14,%r12
1594	ror	$42,%r14
1595
1596	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1597
1598	add	%r13,%r12
1599
1600	add	24(%rsp),%r12
1601
1602	add	80(%rsp),%r12
1603	mov	%rcx,%r13
1604	mov	%rcx,%r14
1605	mov	%rdx,%r15
1606
1607	ror	$14,%r13
1608	ror	$18,%r14
1609	xor	%r8,%r15			# f^g
1610
1611	xor	%r14,%r13
1612	ror	$23,%r14
1613	and	%rcx,%r15			# (f^g)&e
1614	mov	%r12,80(%rsp)
1615
1616	xor	%r14,%r13			# Sigma1(e)
1617	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
1618	add	%r9,%r12			# T1+=h
1619
1620	mov	%r10,%r9
1621	add	%r13,%r12			# T1+=Sigma1(e)
1622
1623	add	%r15,%r12			# T1+=Ch(e,f,g)
1624	mov	%r10,%r13
1625	mov	%r10,%r14
1626
1627	ror	$28,%r9
1628	ror	$34,%r13
1629	mov	%r10,%r15
1630	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1631
1632	xor	%r13,%r9
1633	ror	$5,%r13
1634	or	%rax,%r14			# a|c
1635
1636	xor	%r13,%r9			# h=Sigma0(a)
1637	and	%rax,%r15			# a&c
1638	add	%r12,%rbx			# d+=T1
1639
1640	and	%r11,%r14			# (a|c)&b
1641	add	%r12,%r9			# h+=T1
1642
1643	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1644	lea	1(%rdi),%rdi	# round++
1645
1646	add	%r14,%r9			# h+=Maj(a,b,c)
1647	mov	96(%rsp),%r13
1648	mov	72(%rsp),%r12
1649
1650	mov	%r13,%r15
1651
1652	shr	$7,%r13
1653	ror	$1,%r15
1654
1655	xor	%r15,%r13
1656	ror	$7,%r15
1657
1658	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1659	mov	%r12,%r14
1660
1661	shr	$6,%r12
1662	ror	$19,%r14
1663
1664	xor	%r14,%r12
1665	ror	$42,%r14
1666
1667	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1668
1669	add	%r13,%r12
1670
1671	add	32(%rsp),%r12
1672
1673	add	88(%rsp),%r12
1674	mov	%rbx,%r13
1675	mov	%rbx,%r14
1676	mov	%rcx,%r15
1677
1678	ror	$14,%r13
1679	ror	$18,%r14
1680	xor	%rdx,%r15			# f^g
1681
1682	xor	%r14,%r13
1683	ror	$23,%r14
1684	and	%rbx,%r15			# (f^g)&e
1685	mov	%r12,88(%rsp)
1686
1687	xor	%r14,%r13			# Sigma1(e)
1688	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1689	add	%r8,%r12			# T1+=h
1690
1691	mov	%r9,%r8
1692	add	%r13,%r12			# T1+=Sigma1(e)
1693
1694	add	%r15,%r12			# T1+=Ch(e,f,g)
1695	mov	%r9,%r13
1696	mov	%r9,%r14
1697
1698	ror	$28,%r8
1699	ror	$34,%r13
1700	mov	%r9,%r15
1701	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1702
1703	xor	%r13,%r8
1704	ror	$5,%r13
1705	or	%r11,%r14			# a|c
1706
1707	xor	%r13,%r8			# h=Sigma0(a)
1708	and	%r11,%r15			# a&c
1709	add	%r12,%rax			# d+=T1
1710
1711	and	%r10,%r14			# (a|c)&b
1712	add	%r12,%r8			# h+=T1
1713
1714	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1715	lea	1(%rdi),%rdi	# round++
1716
1717	add	%r14,%r8			# h+=Maj(a,b,c)
1718	mov	104(%rsp),%r13
1719	mov	80(%rsp),%r12
1720
1721	mov	%r13,%r15
1722
1723	shr	$7,%r13
1724	ror	$1,%r15
1725
1726	xor	%r15,%r13
1727	ror	$7,%r15
1728
1729	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1730	mov	%r12,%r14
1731
1732	shr	$6,%r12
1733	ror	$19,%r14
1734
1735	xor	%r14,%r12
1736	ror	$42,%r14
1737
1738	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1739
1740	add	%r13,%r12
1741
1742	add	40(%rsp),%r12
1743
1744	add	96(%rsp),%r12
1745	mov	%rax,%r13
1746	mov	%rax,%r14
1747	mov	%rbx,%r15
1748
1749	ror	$14,%r13
1750	ror	$18,%r14
1751	xor	%rcx,%r15			# f^g
1752
1753	xor	%r14,%r13
1754	ror	$23,%r14
1755	and	%rax,%r15			# (f^g)&e
1756	mov	%r12,96(%rsp)
1757
1758	xor	%r14,%r13			# Sigma1(e)
1759	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1760	add	%rdx,%r12			# T1+=h
1761
1762	mov	%r8,%rdx
1763	add	%r13,%r12			# T1+=Sigma1(e)
1764
1765	add	%r15,%r12			# T1+=Ch(e,f,g)
1766	mov	%r8,%r13
1767	mov	%r8,%r14
1768
1769	ror	$28,%rdx
1770	ror	$34,%r13
1771	mov	%r8,%r15
1772	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1773
1774	xor	%r13,%rdx
1775	ror	$5,%r13
1776	or	%r10,%r14			# a|c
1777
1778	xor	%r13,%rdx			# h=Sigma0(a)
1779	and	%r10,%r15			# a&c
1780	add	%r12,%r11			# d+=T1
1781
1782	and	%r9,%r14			# (a|c)&b
1783	add	%r12,%rdx			# h+=T1
1784
1785	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1786	lea	1(%rdi),%rdi	# round++
1787
1788	add	%r14,%rdx			# h+=Maj(a,b,c)
1789	mov	112(%rsp),%r13
1790	mov	88(%rsp),%r12
1791
1792	mov	%r13,%r15
1793
1794	shr	$7,%r13
1795	ror	$1,%r15
1796
1797	xor	%r15,%r13
1798	ror	$7,%r15
1799
1800	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1801	mov	%r12,%r14
1802
1803	shr	$6,%r12
1804	ror	$19,%r14
1805
1806	xor	%r14,%r12
1807	ror	$42,%r14
1808
1809	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1810
1811	add	%r13,%r12
1812
1813	add	48(%rsp),%r12
1814
1815	add	104(%rsp),%r12
1816	mov	%r11,%r13
1817	mov	%r11,%r14
1818	mov	%rax,%r15
1819
1820	ror	$14,%r13
1821	ror	$18,%r14
1822	xor	%rbx,%r15			# f^g
1823
1824	xor	%r14,%r13
1825	ror	$23,%r14
1826	and	%r11,%r15			# (f^g)&e
1827	mov	%r12,104(%rsp)
1828
1829	xor	%r14,%r13			# Sigma1(e)
1830	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1831	add	%rcx,%r12			# T1+=h
1832
1833	mov	%rdx,%rcx
1834	add	%r13,%r12			# T1+=Sigma1(e)
1835
1836	add	%r15,%r12			# T1+=Ch(e,f,g)
1837	mov	%rdx,%r13
1838	mov	%rdx,%r14
1839
1840	ror	$28,%rcx
1841	ror	$34,%r13
1842	mov	%rdx,%r15
1843	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1844
1845	xor	%r13,%rcx
1846	ror	$5,%r13
1847	or	%r9,%r14			# a|c
1848
1849	xor	%r13,%rcx			# h=Sigma0(a)
1850	and	%r9,%r15			# a&c
1851	add	%r12,%r10			# d+=T1
1852
1853	and	%r8,%r14			# (a|c)&b
1854	add	%r12,%rcx			# h+=T1
1855
1856	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1857	lea	1(%rdi),%rdi	# round++
1858
1859	add	%r14,%rcx			# h+=Maj(a,b,c)
1860	mov	120(%rsp),%r13
1861	mov	96(%rsp),%r12
1862
1863	mov	%r13,%r15
1864
1865	shr	$7,%r13
1866	ror	$1,%r15
1867
1868	xor	%r15,%r13
1869	ror	$7,%r15
1870
1871	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1872	mov	%r12,%r14
1873
1874	shr	$6,%r12
1875	ror	$19,%r14
1876
1877	xor	%r14,%r12
1878	ror	$42,%r14
1879
1880	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1881
1882	add	%r13,%r12
1883
1884	add	56(%rsp),%r12
1885
1886	add	112(%rsp),%r12
1887	mov	%r10,%r13
1888	mov	%r10,%r14
1889	mov	%r11,%r15
1890
1891	ror	$14,%r13
1892	ror	$18,%r14
1893	xor	%rax,%r15			# f^g
1894
1895	xor	%r14,%r13
1896	ror	$23,%r14
1897	and	%r10,%r15			# (f^g)&e
1898	mov	%r12,112(%rsp)
1899
1900	xor	%r14,%r13			# Sigma1(e)
1901	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
1902	add	%rbx,%r12			# T1+=h
1903
1904	mov	%rcx,%rbx
1905	add	%r13,%r12			# T1+=Sigma1(e)
1906
1907	add	%r15,%r12			# T1+=Ch(e,f,g)
1908	mov	%rcx,%r13
1909	mov	%rcx,%r14
1910
1911	ror	$28,%rbx
1912	ror	$34,%r13
1913	mov	%rcx,%r15
1914	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1915
1916	xor	%r13,%rbx
1917	ror	$5,%r13
1918	or	%r8,%r14			# a|c
1919
1920	xor	%r13,%rbx			# h=Sigma0(a)
1921	and	%r8,%r15			# a&c
1922	add	%r12,%r9			# d+=T1
1923
1924	and	%rdx,%r14			# (a|c)&b
1925	add	%r12,%rbx			# h+=T1
1926
1927	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1928	lea	1(%rdi),%rdi	# round++
1929
1930	add	%r14,%rbx			# h+=Maj(a,b,c)
1931	mov	0(%rsp),%r13
1932	mov	104(%rsp),%r12
1933
1934	mov	%r13,%r15
1935
1936	shr	$7,%r13
1937	ror	$1,%r15
1938
1939	xor	%r15,%r13
1940	ror	$7,%r15
1941
1942	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1943	mov	%r12,%r14
1944
1945	shr	$6,%r12
1946	ror	$19,%r14
1947
1948	xor	%r14,%r12
1949	ror	$42,%r14
1950
1951	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1952
1953	add	%r13,%r12
1954
1955	add	64(%rsp),%r12
1956
1957	add	120(%rsp),%r12
1958	mov	%r9,%r13
1959	mov	%r9,%r14
1960	mov	%r10,%r15
1961
1962	ror	$14,%r13
1963	ror	$18,%r14
1964	xor	%r11,%r15			# f^g
1965
1966	xor	%r14,%r13
1967	ror	$23,%r14
1968	and	%r9,%r15			# (f^g)&e
1969	mov	%r12,120(%rsp)
1970
1971	xor	%r14,%r13			# Sigma1(e)
1972	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
1973	add	%rax,%r12			# T1+=h
1974
1975	mov	%rbx,%rax
1976	add	%r13,%r12			# T1+=Sigma1(e)
1977
1978	add	%r15,%r12			# T1+=Ch(e,f,g)
1979	mov	%rbx,%r13
1980	mov	%rbx,%r14
1981
1982	ror	$28,%rax
1983	ror	$34,%r13
1984	mov	%rbx,%r15
1985	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1986
1987	xor	%r13,%rax
1988	ror	$5,%r13
1989	or	%rdx,%r14			# a|c
1990
1991	xor	%r13,%rax			# h=Sigma0(a)
1992	and	%rdx,%r15			# a&c
1993	add	%r12,%r8			# d+=T1
1994
1995	and	%rcx,%r14			# (a|c)&b
1996	add	%r12,%rax			# h+=T1
1997
1998	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1999	lea	1(%rdi),%rdi	# round++
2000
2001	add	%r14,%rax			# h+=Maj(a,b,c)
2002	cmp	$80,%rdi
2003	jb	.Lrounds_16_xx
2004
2005	mov	16*8+0*8(%rsp),%rdi
2006	lea	16*8(%rsi),%rsi
2007
2008	add	8*0(%rdi),%rax
2009	add	8*1(%rdi),%rbx
2010	add	8*2(%rdi),%rcx
2011	add	8*3(%rdi),%rdx
2012	add	8*4(%rdi),%r8
2013	add	8*5(%rdi),%r9
2014	add	8*6(%rdi),%r10
2015	add	8*7(%rdi),%r11
2016
2017	cmp	16*8+2*8(%rsp),%rsi
2018
2019	mov	%rax,8*0(%rdi)
2020	mov	%rbx,8*1(%rdi)
2021	mov	%rcx,8*2(%rdi)
2022	mov	%rdx,8*3(%rdi)
2023	mov	%r8,8*4(%rdi)
2024	mov	%r9,8*5(%rdi)
2025	mov	%r10,8*6(%rdi)
2026	mov	%r11,8*7(%rdi)
2027	jb	.Lloop
2028
2029	mov	16*8+3*8(%rsp),%rsp
2030	pop	%r15
2031	pop	%r14
2032	pop	%r13
2033	pop	%r12
2034	pop	%rbp
2035	pop	%rbx
2036
2037	ret
2038SET_SIZE(SHA512TransformBlocks)
2039
2040.data
2041.align	64
2042.type	K512,@object
2043K512:
2044	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
2045	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
2046	.quad	0x3956c25bf348b538,0x59f111f1b605d019
2047	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
2048	.quad	0xd807aa98a3030242,0x12835b0145706fbe
2049	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
2050	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
2051	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
2052	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
2053	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
2054	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
2055	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
2056	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
2057	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
2058	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
2059	.quad	0x06ca6351e003826f,0x142929670a0e6e70
2060	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
2061	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
2062	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
2063	.quad	0x81c2c92e47edaee6,0x92722c851482353b
2064	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
2065	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
2066	.quad	0xd192e819d6ef5218,0xd69906245565a910
2067	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
2068	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
2069	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
2070	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
2071	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
2072	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
2073	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
2074	.quad	0x90befffa23631e28,0xa4506cebde82bde9
2075	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
2076	.quad	0xca273eceea26619c,0xd186b8c721c0c207
2077	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
2078	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
2079	.quad	0x113f9804bef90dae,0x1b710b35131c471b
2080	.quad	0x28db77f523047d84,0x32caab7b40c72493
2081	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
2082	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
2083	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
2084#endif /* !lint && !__lint */
2085
2086#ifdef __ELF__
2087.section .note.GNU-stack,"",%progbits
2088#endif
2089