1#include <machine/asm.h>
2.text
3.globl	ChaCha20_ctr32
4.type	ChaCha20_ctr32,@function
5.align	16
6ChaCha20_ctr32:
7.L_ChaCha20_ctr32_begin:
8	pushl	%ebp
9	pushl	%ebx
10	pushl	%esi
11	pushl	%edi
12	xorl	%eax,%eax
13	cmpl	28(%esp),%eax
14	je	.L000no_data
15	call	.Lpic_point
16.Lpic_point:
17	popl	%eax
18	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
19	testl	$16777216,(%ebp)
20	jz	.L001x86
21	testl	$512,4(%ebp)
22	jz	.L001x86
23	jmp	.Lssse3_shortcut
24.L001x86:
25	movl	32(%esp),%esi
26	movl	36(%esp),%edi
27	subl	$132,%esp
28	movl	(%esi),%eax
29	movl	4(%esi),%ebx
30	movl	8(%esi),%ecx
31	movl	12(%esi),%edx
32	movl	%eax,80(%esp)
33	movl	%ebx,84(%esp)
34	movl	%ecx,88(%esp)
35	movl	%edx,92(%esp)
36	movl	16(%esi),%eax
37	movl	20(%esi),%ebx
38	movl	24(%esi),%ecx
39	movl	28(%esi),%edx
40	movl	%eax,96(%esp)
41	movl	%ebx,100(%esp)
42	movl	%ecx,104(%esp)
43	movl	%edx,108(%esp)
44	movl	(%edi),%eax
45	movl	4(%edi),%ebx
46	movl	8(%edi),%ecx
47	movl	12(%edi),%edx
48	subl	$1,%eax
49	movl	%eax,112(%esp)
50	movl	%ebx,116(%esp)
51	movl	%ecx,120(%esp)
52	movl	%edx,124(%esp)
53	jmp	.L002entry
54.align	16
55.L003outer_loop:
56	movl	%ebx,156(%esp)
57	movl	%eax,152(%esp)
58	movl	%ecx,160(%esp)
59.L002entry:
60	movl	$1634760805,%eax
61	movl	$857760878,4(%esp)
62	movl	$2036477234,8(%esp)
63	movl	$1797285236,12(%esp)
64	movl	84(%esp),%ebx
65	movl	88(%esp),%ebp
66	movl	104(%esp),%ecx
67	movl	108(%esp),%esi
68	movl	116(%esp),%edx
69	movl	120(%esp),%edi
70	movl	%ebx,20(%esp)
71	movl	%ebp,24(%esp)
72	movl	%ecx,40(%esp)
73	movl	%esi,44(%esp)
74	movl	%edx,52(%esp)
75	movl	%edi,56(%esp)
76	movl	92(%esp),%ebx
77	movl	124(%esp),%edi
78	movl	112(%esp),%edx
79	movl	80(%esp),%ebp
80	movl	96(%esp),%ecx
81	movl	100(%esp),%esi
82	addl	$1,%edx
83	movl	%ebx,28(%esp)
84	movl	%edi,60(%esp)
85	movl	%edx,112(%esp)
86	movl	$10,%ebx
87	jmp	.L004loop
88.align	16
89.L004loop:
90	addl	%ebp,%eax
91	movl	%ebx,128(%esp)
92	movl	%ebp,%ebx
93	xorl	%eax,%edx
94	roll	$16,%edx
95	addl	%edx,%ecx
96	xorl	%ecx,%ebx
97	movl	52(%esp),%edi
98	roll	$12,%ebx
99	movl	20(%esp),%ebp
100	addl	%ebx,%eax
101	xorl	%eax,%edx
102	movl	%eax,(%esp)
103	roll	$8,%edx
104	movl	4(%esp),%eax
105	addl	%edx,%ecx
106	movl	%edx,48(%esp)
107	xorl	%ecx,%ebx
108	addl	%ebp,%eax
109	roll	$7,%ebx
110	xorl	%eax,%edi
111	movl	%ecx,32(%esp)
112	roll	$16,%edi
113	movl	%ebx,16(%esp)
114	addl	%edi,%esi
115	movl	40(%esp),%ecx
116	xorl	%esi,%ebp
117	movl	56(%esp),%edx
118	roll	$12,%ebp
119	movl	24(%esp),%ebx
120	addl	%ebp,%eax
121	xorl	%eax,%edi
122	movl	%eax,4(%esp)
123	roll	$8,%edi
124	movl	8(%esp),%eax
125	addl	%edi,%esi
126	movl	%edi,52(%esp)
127	xorl	%esi,%ebp
128	addl	%ebx,%eax
129	roll	$7,%ebp
130	xorl	%eax,%edx
131	movl	%esi,36(%esp)
132	roll	$16,%edx
133	movl	%ebp,20(%esp)
134	addl	%edx,%ecx
135	movl	44(%esp),%esi
136	xorl	%ecx,%ebx
137	movl	60(%esp),%edi
138	roll	$12,%ebx
139	movl	28(%esp),%ebp
140	addl	%ebx,%eax
141	xorl	%eax,%edx
142	movl	%eax,8(%esp)
143	roll	$8,%edx
144	movl	12(%esp),%eax
145	addl	%edx,%ecx
146	movl	%edx,56(%esp)
147	xorl	%ecx,%ebx
148	addl	%ebp,%eax
149	roll	$7,%ebx
150	xorl	%eax,%edi
151	roll	$16,%edi
152	movl	%ebx,24(%esp)
153	addl	%edi,%esi
154	xorl	%esi,%ebp
155	roll	$12,%ebp
156	movl	20(%esp),%ebx
157	addl	%ebp,%eax
158	xorl	%eax,%edi
159	movl	%eax,12(%esp)
160	roll	$8,%edi
161	movl	(%esp),%eax
162	addl	%edi,%esi
163	movl	%edi,%edx
164	xorl	%esi,%ebp
165	addl	%ebx,%eax
166	roll	$7,%ebp
167	xorl	%eax,%edx
168	roll	$16,%edx
169	movl	%ebp,28(%esp)
170	addl	%edx,%ecx
171	xorl	%ecx,%ebx
172	movl	48(%esp),%edi
173	roll	$12,%ebx
174	movl	24(%esp),%ebp
175	addl	%ebx,%eax
176	xorl	%eax,%edx
177	movl	%eax,(%esp)
178	roll	$8,%edx
179	movl	4(%esp),%eax
180	addl	%edx,%ecx
181	movl	%edx,60(%esp)
182	xorl	%ecx,%ebx
183	addl	%ebp,%eax
184	roll	$7,%ebx
185	xorl	%eax,%edi
186	movl	%ecx,40(%esp)
187	roll	$16,%edi
188	movl	%ebx,20(%esp)
189	addl	%edi,%esi
190	movl	32(%esp),%ecx
191	xorl	%esi,%ebp
192	movl	52(%esp),%edx
193	roll	$12,%ebp
194	movl	28(%esp),%ebx
195	addl	%ebp,%eax
196	xorl	%eax,%edi
197	movl	%eax,4(%esp)
198	roll	$8,%edi
199	movl	8(%esp),%eax
200	addl	%edi,%esi
201	movl	%edi,48(%esp)
202	xorl	%esi,%ebp
203	addl	%ebx,%eax
204	roll	$7,%ebp
205	xorl	%eax,%edx
206	movl	%esi,44(%esp)
207	roll	$16,%edx
208	movl	%ebp,24(%esp)
209	addl	%edx,%ecx
210	movl	36(%esp),%esi
211	xorl	%ecx,%ebx
212	movl	56(%esp),%edi
213	roll	$12,%ebx
214	movl	16(%esp),%ebp
215	addl	%ebx,%eax
216	xorl	%eax,%edx
217	movl	%eax,8(%esp)
218	roll	$8,%edx
219	movl	12(%esp),%eax
220	addl	%edx,%ecx
221	movl	%edx,52(%esp)
222	xorl	%ecx,%ebx
223	addl	%ebp,%eax
224	roll	$7,%ebx
225	xorl	%eax,%edi
226	roll	$16,%edi
227	movl	%ebx,28(%esp)
228	addl	%edi,%esi
229	xorl	%esi,%ebp
230	movl	48(%esp),%edx
231	roll	$12,%ebp
232	movl	128(%esp),%ebx
233	addl	%ebp,%eax
234	xorl	%eax,%edi
235	movl	%eax,12(%esp)
236	roll	$8,%edi
237	movl	(%esp),%eax
238	addl	%edi,%esi
239	movl	%edi,56(%esp)
240	xorl	%esi,%ebp
241	roll	$7,%ebp
242	decl	%ebx
243	jnz	.L004loop
244	movl	160(%esp),%ebx
245	addl	$1634760805,%eax
246	addl	80(%esp),%ebp
247	addl	96(%esp),%ecx
248	addl	100(%esp),%esi
249	cmpl	$64,%ebx
250	jb	.L005tail
251	movl	156(%esp),%ebx
252	addl	112(%esp),%edx
253	addl	120(%esp),%edi
254	xorl	(%ebx),%eax
255	xorl	16(%ebx),%ebp
256	movl	%eax,(%esp)
257	movl	152(%esp),%eax
258	xorl	32(%ebx),%ecx
259	xorl	36(%ebx),%esi
260	xorl	48(%ebx),%edx
261	xorl	56(%ebx),%edi
262	movl	%ebp,16(%eax)
263	movl	%ecx,32(%eax)
264	movl	%esi,36(%eax)
265	movl	%edx,48(%eax)
266	movl	%edi,56(%eax)
267	movl	4(%esp),%ebp
268	movl	8(%esp),%ecx
269	movl	12(%esp),%esi
270	movl	20(%esp),%edx
271	movl	24(%esp),%edi
272	addl	$857760878,%ebp
273	addl	$2036477234,%ecx
274	addl	$1797285236,%esi
275	addl	84(%esp),%edx
276	addl	88(%esp),%edi
277	xorl	4(%ebx),%ebp
278	xorl	8(%ebx),%ecx
279	xorl	12(%ebx),%esi
280	xorl	20(%ebx),%edx
281	xorl	24(%ebx),%edi
282	movl	%ebp,4(%eax)
283	movl	%ecx,8(%eax)
284	movl	%esi,12(%eax)
285	movl	%edx,20(%eax)
286	movl	%edi,24(%eax)
287	movl	28(%esp),%ebp
288	movl	40(%esp),%ecx
289	movl	44(%esp),%esi
290	movl	52(%esp),%edx
291	movl	60(%esp),%edi
292	addl	92(%esp),%ebp
293	addl	104(%esp),%ecx
294	addl	108(%esp),%esi
295	addl	116(%esp),%edx
296	addl	124(%esp),%edi
297	xorl	28(%ebx),%ebp
298	xorl	40(%ebx),%ecx
299	xorl	44(%ebx),%esi
300	xorl	52(%ebx),%edx
301	xorl	60(%ebx),%edi
302	leal	64(%ebx),%ebx
303	movl	%ebp,28(%eax)
304	movl	(%esp),%ebp
305	movl	%ecx,40(%eax)
306	movl	160(%esp),%ecx
307	movl	%esi,44(%eax)
308	movl	%edx,52(%eax)
309	movl	%edi,60(%eax)
310	movl	%ebp,(%eax)
311	leal	64(%eax),%eax
312	subl	$64,%ecx
313	jnz	.L003outer_loop
314	jmp	.L006done
315.L005tail:
316	addl	112(%esp),%edx
317	addl	120(%esp),%edi
318	movl	%eax,(%esp)
319	movl	%ebp,16(%esp)
320	movl	%ecx,32(%esp)
321	movl	%esi,36(%esp)
322	movl	%edx,48(%esp)
323	movl	%edi,56(%esp)
324	movl	4(%esp),%ebp
325	movl	8(%esp),%ecx
326	movl	12(%esp),%esi
327	movl	20(%esp),%edx
328	movl	24(%esp),%edi
329	addl	$857760878,%ebp
330	addl	$2036477234,%ecx
331	addl	$1797285236,%esi
332	addl	84(%esp),%edx
333	addl	88(%esp),%edi
334	movl	%ebp,4(%esp)
335	movl	%ecx,8(%esp)
336	movl	%esi,12(%esp)
337	movl	%edx,20(%esp)
338	movl	%edi,24(%esp)
339	movl	28(%esp),%ebp
340	movl	40(%esp),%ecx
341	movl	44(%esp),%esi
342	movl	52(%esp),%edx
343	movl	60(%esp),%edi
344	addl	92(%esp),%ebp
345	addl	104(%esp),%ecx
346	addl	108(%esp),%esi
347	addl	116(%esp),%edx
348	addl	124(%esp),%edi
349	movl	%ebp,28(%esp)
350	movl	156(%esp),%ebp
351	movl	%ecx,40(%esp)
352	movl	152(%esp),%ecx
353	movl	%esi,44(%esp)
354	xorl	%esi,%esi
355	movl	%edx,52(%esp)
356	movl	%edi,60(%esp)
357	xorl	%eax,%eax
358	xorl	%edx,%edx
359.L007tail_loop:
360	movb	(%esi,%ebp,1),%al
361	movb	(%esp,%esi,1),%dl
362	leal	1(%esi),%esi
363	xorb	%dl,%al
364	movb	%al,-1(%ecx,%esi,1)
365	decl	%ebx
366	jnz	.L007tail_loop
367.L006done:
368	addl	$132,%esp
369.L000no_data:
370	popl	%edi
371	popl	%esi
372	popl	%ebx
373	popl	%ebp
374	ret
375.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
376.globl	ChaCha20_ssse3
377.type	ChaCha20_ssse3,@function
378.align	16
379ChaCha20_ssse3:
380.L_ChaCha20_ssse3_begin:
381	pushl	%ebp
382	pushl	%ebx
383	pushl	%esi
384	pushl	%edi
385.Lssse3_shortcut:
386	movl	20(%esp),%edi
387	movl	24(%esp),%esi
388	movl	28(%esp),%ecx
389	movl	32(%esp),%edx
390	movl	36(%esp),%ebx
391	movl	%esp,%ebp
392	subl	$524,%esp
393	andl	$-64,%esp
394	movl	%ebp,512(%esp)
395	leal	.Lssse3_data-.Lpic_point(%eax),%eax
396	movdqu	(%ebx),%xmm3
397.L0081x:
398	movdqa	32(%eax),%xmm0
399	movdqu	(%edx),%xmm1
400	movdqu	16(%edx),%xmm2
401	movdqa	(%eax),%xmm6
402	movdqa	16(%eax),%xmm7
403	movl	%ebp,48(%esp)
404	movdqa	%xmm0,(%esp)
405	movdqa	%xmm1,16(%esp)
406	movdqa	%xmm2,32(%esp)
407	movdqa	%xmm3,48(%esp)
408	movl	$10,%edx
409	jmp	.L009loop1x
410.align	16
411.L010outer1x:
412	movdqa	80(%eax),%xmm3
413	movdqa	(%esp),%xmm0
414	movdqa	16(%esp),%xmm1
415	movdqa	32(%esp),%xmm2
416	paddd	48(%esp),%xmm3
417	movl	$10,%edx
418	movdqa	%xmm3,48(%esp)
419	jmp	.L009loop1x
420.align	16
421.L009loop1x:
422	paddd	%xmm1,%xmm0
423	pxor	%xmm0,%xmm3
424.byte	102,15,56,0,222
425	paddd	%xmm3,%xmm2
426	pxor	%xmm2,%xmm1
427	movdqa	%xmm1,%xmm4
428	psrld	$20,%xmm1
429	pslld	$12,%xmm4
430	por	%xmm4,%xmm1
431	paddd	%xmm1,%xmm0
432	pxor	%xmm0,%xmm3
433.byte	102,15,56,0,223
434	paddd	%xmm3,%xmm2
435	pxor	%xmm2,%xmm1
436	movdqa	%xmm1,%xmm4
437	psrld	$25,%xmm1
438	pslld	$7,%xmm4
439	por	%xmm4,%xmm1
440	pshufd	$78,%xmm2,%xmm2
441	pshufd	$57,%xmm1,%xmm1
442	pshufd	$147,%xmm3,%xmm3
443	nop
444	paddd	%xmm1,%xmm0
445	pxor	%xmm0,%xmm3
446.byte	102,15,56,0,222
447	paddd	%xmm3,%xmm2
448	pxor	%xmm2,%xmm1
449	movdqa	%xmm1,%xmm4
450	psrld	$20,%xmm1
451	pslld	$12,%xmm4
452	por	%xmm4,%xmm1
453	paddd	%xmm1,%xmm0
454	pxor	%xmm0,%xmm3
455.byte	102,15,56,0,223
456	paddd	%xmm3,%xmm2
457	pxor	%xmm2,%xmm1
458	movdqa	%xmm1,%xmm4
459	psrld	$25,%xmm1
460	pslld	$7,%xmm4
461	por	%xmm4,%xmm1
462	pshufd	$78,%xmm2,%xmm2
463	pshufd	$147,%xmm1,%xmm1
464	pshufd	$57,%xmm3,%xmm3
465	decl	%edx
466	jnz	.L009loop1x
467	paddd	(%esp),%xmm0
468	paddd	16(%esp),%xmm1
469	paddd	32(%esp),%xmm2
470	paddd	48(%esp),%xmm3
471	cmpl	$64,%ecx
472	jb	.L011tail
473	movdqu	(%esi),%xmm4
474	movdqu	16(%esi),%xmm5
475	pxor	%xmm4,%xmm0
476	movdqu	32(%esi),%xmm4
477	pxor	%xmm5,%xmm1
478	movdqu	48(%esi),%xmm5
479	pxor	%xmm4,%xmm2
480	pxor	%xmm5,%xmm3
481	leal	64(%esi),%esi
482	movdqu	%xmm0,(%edi)
483	movdqu	%xmm1,16(%edi)
484	movdqu	%xmm2,32(%edi)
485	movdqu	%xmm3,48(%edi)
486	leal	64(%edi),%edi
487	subl	$64,%ecx
488	jnz	.L010outer1x
489	jmp	.L012done
490.L011tail:
491	movdqa	%xmm0,(%esp)
492	movdqa	%xmm1,16(%esp)
493	movdqa	%xmm2,32(%esp)
494	movdqa	%xmm3,48(%esp)
495	xorl	%eax,%eax
496	xorl	%edx,%edx
497	xorl	%ebp,%ebp
498.L013tail_loop:
499	movb	(%esp,%ebp,1),%al
500	movb	(%esi,%ebp,1),%dl
501	leal	1(%ebp),%ebp
502	xorb	%dl,%al
503	movb	%al,-1(%edi,%ebp,1)
504	decl	%ecx
505	jnz	.L013tail_loop
506.L012done:
507	movl	512(%esp),%esp
508	popl	%edi
509	popl	%esi
510	popl	%ebx
511	popl	%ebp
512	ret
513.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
514.align	64
515.Lssse3_data:
516.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
517.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
518.long	1634760805,857760878,2036477234,1797285236
519.long	0,1,2,3
520.long	4,4,4,4
521.long	1,0,0,0
522.long	4,0,0,0
523.long	0,-1,-1,-1
524.align	64
525.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
526.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
527.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
528.byte	114,103,62,0
529.comm	OPENSSL_ia32cap_P,16,4
530