x25519-x86_64.S revision 1.1
1#include <machine/asm.h>
2.text
3
4.globl	x25519_fe51_mul
5.type	x25519_fe51_mul,@function
6.align	32
7x25519_fe51_mul:
8.cfi_startproc
9	pushq	%rbp
10.cfi_adjust_cfa_offset	8
11.cfi_offset	%rbp,-16
12	pushq	%rbx
13.cfi_adjust_cfa_offset	8
14.cfi_offset	%rbx,-24
15	pushq	%r12
16.cfi_adjust_cfa_offset	8
17.cfi_offset	%r12,-32
18	pushq	%r13
19.cfi_adjust_cfa_offset	8
20.cfi_offset	%r13,-40
21	pushq	%r14
22.cfi_adjust_cfa_offset	8
23.cfi_offset	%r14,-48
24	pushq	%r15
25.cfi_adjust_cfa_offset	8
26.cfi_offset	%r15,-56
27	leaq	-40(%rsp),%rsp
28.cfi_adjust_cfa_offset	40
29.Lfe51_mul_body:
30
31	movq	0(%rsi),%rax
32	movq	0(%rdx),%r11
33	movq	8(%rdx),%r12
34	movq	16(%rdx),%r13
35	movq	24(%rdx),%rbp
36	movq	32(%rdx),%r14
37
38	movq	%rdi,32(%rsp)
39	movq	%rax,%rdi
40	mulq	%r11
41	movq	%r11,0(%rsp)
42	movq	%rax,%rbx
43	movq	%rdi,%rax
44	movq	%rdx,%rcx
45	mulq	%r12
46	movq	%r12,8(%rsp)
47	movq	%rax,%r8
48	movq	%rdi,%rax
49	leaq	(%r14,%r14,8),%r15
50	movq	%rdx,%r9
51	mulq	%r13
52	movq	%r13,16(%rsp)
53	movq	%rax,%r10
54	movq	%rdi,%rax
55	leaq	(%r14,%r15,2),%rdi
56	movq	%rdx,%r11
57	mulq	%rbp
58	movq	%rax,%r12
59	movq	0(%rsi),%rax
60	movq	%rdx,%r13
61	mulq	%r14
62	movq	%rax,%r14
63	movq	8(%rsi),%rax
64	movq	%rdx,%r15
65
66	mulq	%rdi
67	addq	%rax,%rbx
68	movq	16(%rsi),%rax
69	adcq	%rdx,%rcx
70	mulq	%rdi
71	addq	%rax,%r8
72	movq	24(%rsi),%rax
73	adcq	%rdx,%r9
74	mulq	%rdi
75	addq	%rax,%r10
76	movq	32(%rsi),%rax
77	adcq	%rdx,%r11
78	mulq	%rdi
79	imulq	$19,%rbp,%rdi
80	addq	%rax,%r12
81	movq	8(%rsi),%rax
82	adcq	%rdx,%r13
83	mulq	%rbp
84	movq	16(%rsp),%rbp
85	addq	%rax,%r14
86	movq	16(%rsi),%rax
87	adcq	%rdx,%r15
88
89	mulq	%rdi
90	addq	%rax,%rbx
91	movq	24(%rsi),%rax
92	adcq	%rdx,%rcx
93	mulq	%rdi
94	addq	%rax,%r8
95	movq	32(%rsi),%rax
96	adcq	%rdx,%r9
97	mulq	%rdi
98	imulq	$19,%rbp,%rdi
99	addq	%rax,%r10
100	movq	8(%rsi),%rax
101	adcq	%rdx,%r11
102	mulq	%rbp
103	addq	%rax,%r12
104	movq	16(%rsi),%rax
105	adcq	%rdx,%r13
106	mulq	%rbp
107	movq	8(%rsp),%rbp
108	addq	%rax,%r14
109	movq	24(%rsi),%rax
110	adcq	%rdx,%r15
111
112	mulq	%rdi
113	addq	%rax,%rbx
114	movq	32(%rsi),%rax
115	adcq	%rdx,%rcx
116	mulq	%rdi
117	addq	%rax,%r8
118	movq	8(%rsi),%rax
119	adcq	%rdx,%r9
120	mulq	%rbp
121	imulq	$19,%rbp,%rdi
122	addq	%rax,%r10
123	movq	16(%rsi),%rax
124	adcq	%rdx,%r11
125	mulq	%rbp
126	addq	%rax,%r12
127	movq	24(%rsi),%rax
128	adcq	%rdx,%r13
129	mulq	%rbp
130	movq	0(%rsp),%rbp
131	addq	%rax,%r14
132	movq	32(%rsi),%rax
133	adcq	%rdx,%r15
134
135	mulq	%rdi
136	addq	%rax,%rbx
137	movq	8(%rsi),%rax
138	adcq	%rdx,%rcx
139	mulq	%rbp
140	addq	%rax,%r8
141	movq	16(%rsi),%rax
142	adcq	%rdx,%r9
143	mulq	%rbp
144	addq	%rax,%r10
145	movq	24(%rsi),%rax
146	adcq	%rdx,%r11
147	mulq	%rbp
148	addq	%rax,%r12
149	movq	32(%rsi),%rax
150	adcq	%rdx,%r13
151	mulq	%rbp
152	addq	%rax,%r14
153	adcq	%rdx,%r15
154
155	movq	32(%rsp),%rdi
156	jmp	.Lreduce51
157.Lfe51_mul_epilogue:
158.cfi_endproc
159.size	x25519_fe51_mul,.-x25519_fe51_mul
160
161.globl	x25519_fe51_sqr
162.type	x25519_fe51_sqr,@function
163.align	32
164x25519_fe51_sqr:
165.cfi_startproc
166	pushq	%rbp
167.cfi_adjust_cfa_offset	8
168.cfi_offset	%rbp,-16
169	pushq	%rbx
170.cfi_adjust_cfa_offset	8
171.cfi_offset	%rbx,-24
172	pushq	%r12
173.cfi_adjust_cfa_offset	8
174.cfi_offset	%r12,-32
175	pushq	%r13
176.cfi_adjust_cfa_offset	8
177.cfi_offset	%r13,-40
178	pushq	%r14
179.cfi_adjust_cfa_offset	8
180.cfi_offset	%r14,-48
181	pushq	%r15
182.cfi_adjust_cfa_offset	8
183.cfi_offset	%r15,-56
184	leaq	-40(%rsp),%rsp
185.cfi_adjust_cfa_offset	40
186.Lfe51_sqr_body:
187
188	movq	0(%rsi),%rax
189	movq	16(%rsi),%r15
190	movq	32(%rsi),%rbp
191
192	movq	%rdi,32(%rsp)
193	leaq	(%rax,%rax,1),%r14
194	mulq	%rax
195	movq	%rax,%rbx
196	movq	8(%rsi),%rax
197	movq	%rdx,%rcx
198	mulq	%r14
199	movq	%rax,%r8
200	movq	%r15,%rax
201	movq	%r15,0(%rsp)
202	movq	%rdx,%r9
203	mulq	%r14
204	movq	%rax,%r10
205	movq	24(%rsi),%rax
206	movq	%rdx,%r11
207	imulq	$19,%rbp,%rdi
208	mulq	%r14
209	movq	%rax,%r12
210	movq	%rbp,%rax
211	movq	%rdx,%r13
212	mulq	%r14
213	movq	%rax,%r14
214	movq	%rbp,%rax
215	movq	%rdx,%r15
216
217	mulq	%rdi
218	addq	%rax,%r12
219	movq	8(%rsi),%rax
220	adcq	%rdx,%r13
221
222	movq	24(%rsi),%rsi
223	leaq	(%rax,%rax,1),%rbp
224	mulq	%rax
225	addq	%rax,%r10
226	movq	0(%rsp),%rax
227	adcq	%rdx,%r11
228	mulq	%rbp
229	addq	%rax,%r12
230	movq	%rbp,%rax
231	adcq	%rdx,%r13
232	mulq	%rsi
233	addq	%rax,%r14
234	movq	%rbp,%rax
235	adcq	%rdx,%r15
236	imulq	$19,%rsi,%rbp
237	mulq	%rdi
238	addq	%rax,%rbx
239	leaq	(%rsi,%rsi,1),%rax
240	adcq	%rdx,%rcx
241
242	mulq	%rdi
243	addq	%rax,%r10
244	movq	%rsi,%rax
245	adcq	%rdx,%r11
246	mulq	%rbp
247	addq	%rax,%r8
248	movq	0(%rsp),%rax
249	adcq	%rdx,%r9
250
251	leaq	(%rax,%rax,1),%rsi
252	mulq	%rax
253	addq	%rax,%r14
254	movq	%rbp,%rax
255	adcq	%rdx,%r15
256	mulq	%rsi
257	addq	%rax,%rbx
258	movq	%rsi,%rax
259	adcq	%rdx,%rcx
260	mulq	%rdi
261	addq	%rax,%r8
262	adcq	%rdx,%r9
263
264	movq	32(%rsp),%rdi
265	jmp	.Lreduce51
266
267.align	32
268.Lreduce51:
269	movq	$0x7ffffffffffff,%rbp
270
271	movq	%r10,%rdx
272	shrq	$51,%r10
273	shlq	$13,%r11
274	andq	%rbp,%rdx
275	orq	%r10,%r11
276	addq	%r11,%r12
277	adcq	$0,%r13
278
279	movq	%rbx,%rax
280	shrq	$51,%rbx
281	shlq	$13,%rcx
282	andq	%rbp,%rax
283	orq	%rbx,%rcx
284	addq	%rcx,%r8
285	adcq	$0,%r9
286
287	movq	%r12,%rbx
288	shrq	$51,%r12
289	shlq	$13,%r13
290	andq	%rbp,%rbx
291	orq	%r12,%r13
292	addq	%r13,%r14
293	adcq	$0,%r15
294
295	movq	%r8,%rcx
296	shrq	$51,%r8
297	shlq	$13,%r9
298	andq	%rbp,%rcx
299	orq	%r8,%r9
300	addq	%r9,%rdx
301
302	movq	%r14,%r10
303	shrq	$51,%r14
304	shlq	$13,%r15
305	andq	%rbp,%r10
306	orq	%r14,%r15
307
308	leaq	(%r15,%r15,8),%r14
309	leaq	(%r15,%r14,2),%r15
310	addq	%r15,%rax
311
312	movq	%rdx,%r8
313	andq	%rbp,%rdx
314	shrq	$51,%r8
315	addq	%r8,%rbx
316
317	movq	%rax,%r9
318	andq	%rbp,%rax
319	shrq	$51,%r9
320	addq	%r9,%rcx
321
322	movq	%rax,0(%rdi)
323	movq	%rcx,8(%rdi)
324	movq	%rdx,16(%rdi)
325	movq	%rbx,24(%rdi)
326	movq	%r10,32(%rdi)
327
328	movq	40(%rsp),%r15
329.cfi_restore	%r15
330	movq	48(%rsp),%r14
331.cfi_restore	%r14
332	movq	56(%rsp),%r13
333.cfi_restore	%r13
334	movq	64(%rsp),%r12
335.cfi_restore	%r12
336	movq	72(%rsp),%rbx
337.cfi_restore	%rbx
338	movq	80(%rsp),%rbp
339.cfi_restore	%rbp
340	leaq	88(%rsp),%rsp
341.cfi_adjust_cfa_offset	88
342.Lfe51_sqr_epilogue:
343	.byte	0xf3,0xc3
344.cfi_endproc
345.size	x25519_fe51_sqr,.-x25519_fe51_sqr
346
347.globl	x25519_fe51_mul121666
348.type	x25519_fe51_mul121666,@function
349.align	32
350x25519_fe51_mul121666:
351.cfi_startproc
352	pushq	%rbp
353.cfi_adjust_cfa_offset	8
354.cfi_offset	%rbp,-16
355	pushq	%rbx
356.cfi_adjust_cfa_offset	8
357.cfi_offset	%rbx,-24
358	pushq	%r12
359.cfi_adjust_cfa_offset	8
360.cfi_offset	%r12,-32
361	pushq	%r13
362.cfi_adjust_cfa_offset	8
363.cfi_offset	%r13,-40
364	pushq	%r14
365.cfi_adjust_cfa_offset	8
366.cfi_offset	%r14,-48
367	pushq	%r15
368.cfi_adjust_cfa_offset	8
369.cfi_offset	%r15,-56
370	leaq	-40(%rsp),%rsp
371.cfi_adjust_cfa_offset	40
372.Lfe51_mul121666_body:
373	movl	$121666,%eax
374
375	mulq	0(%rsi)
376	movq	%rax,%rbx
377	movl	$121666,%eax
378	movq	%rdx,%rcx
379	mulq	8(%rsi)
380	movq	%rax,%r8
381	movl	$121666,%eax
382	movq	%rdx,%r9
383	mulq	16(%rsi)
384	movq	%rax,%r10
385	movl	$121666,%eax
386	movq	%rdx,%r11
387	mulq	24(%rsi)
388	movq	%rax,%r12
389	movl	$121666,%eax
390	movq	%rdx,%r13
391	mulq	32(%rsi)
392	movq	%rax,%r14
393	movq	%rdx,%r15
394
395	jmp	.Lreduce51
396.Lfe51_mul121666_epilogue:
397.cfi_endproc
398.size	x25519_fe51_mul121666,.-x25519_fe51_mul121666
399
400.globl	x25519_fe64_eligible
401.type	x25519_fe64_eligible,@function
402.align	32
403x25519_fe64_eligible:
404	movl	OPENSSL_ia32cap_P+8(%rip),%ecx
405	xorl	%eax,%eax
406	andl	$0x80100,%ecx
407	cmpl	$0x80100,%ecx
408	cmovel	%ecx,%eax
409	.byte	0xf3,0xc3
410.size	x25519_fe64_eligible,.-x25519_fe64_eligible
411
412.globl	x25519_fe64_mul
413.type	x25519_fe64_mul,@function
414.align	32
415x25519_fe64_mul:
416.cfi_startproc
417	pushq	%rbp
418.cfi_adjust_cfa_offset	8
419.cfi_offset	%rbp,-16
420	pushq	%rbx
421.cfi_adjust_cfa_offset	8
422.cfi_offset	%rbx,-24
423	pushq	%r12
424.cfi_adjust_cfa_offset	8
425.cfi_offset	%r12,-32
426	pushq	%r13
427.cfi_adjust_cfa_offset	8
428.cfi_offset	%r13,-40
429	pushq	%r14
430.cfi_adjust_cfa_offset	8
431.cfi_offset	%r14,-48
432	pushq	%r15
433.cfi_adjust_cfa_offset	8
434.cfi_offset	%r15,-56
435	pushq	%rdi
436.cfi_adjust_cfa_offset	8
437.cfi_offset	%rdi,-64
438	leaq	-16(%rsp),%rsp
439.cfi_adjust_cfa_offset	16
440.Lfe64_mul_body:
441
442	movq	%rdx,%rax
443	movq	0(%rdx),%rbp
444	movq	0(%rsi),%rdx
445	movq	8(%rax),%rcx
446	movq	16(%rax),%r14
447	movq	24(%rax),%r15
448
449	mulxq	%rbp,%r8,%rax
450	xorl	%edi,%edi
451	mulxq	%rcx,%r9,%rbx
452	adcxq	%rax,%r9
453	mulxq	%r14,%r10,%rax
454	adcxq	%rbx,%r10
455	mulxq	%r15,%r11,%r12
456	movq	8(%rsi),%rdx
457	adcxq	%rax,%r11
458	movq	%r14,(%rsp)
459	adcxq	%rdi,%r12
460
461	mulxq	%rbp,%rax,%rbx
462	adoxq	%rax,%r9
463	adcxq	%rbx,%r10
464	mulxq	%rcx,%rax,%rbx
465	adoxq	%rax,%r10
466	adcxq	%rbx,%r11
467	mulxq	%r14,%rax,%rbx
468	adoxq	%rax,%r11
469	adcxq	%rbx,%r12
470	mulxq	%r15,%rax,%r13
471	movq	16(%rsi),%rdx
472	adoxq	%rax,%r12
473	adcxq	%rdi,%r13
474	adoxq	%rdi,%r13
475
476	mulxq	%rbp,%rax,%rbx
477	adcxq	%rax,%r10
478	adoxq	%rbx,%r11
479	mulxq	%rcx,%rax,%rbx
480	adcxq	%rax,%r11
481	adoxq	%rbx,%r12
482	mulxq	%r14,%rax,%rbx
483	adcxq	%rax,%r12
484	adoxq	%rbx,%r13
485	mulxq	%r15,%rax,%r14
486	movq	24(%rsi),%rdx
487	adcxq	%rax,%r13
488	adoxq	%rdi,%r14
489	adcxq	%rdi,%r14
490
491	mulxq	%rbp,%rax,%rbx
492	adoxq	%rax,%r11
493	adcxq	%rbx,%r12
494	mulxq	%rcx,%rax,%rbx
495	adoxq	%rax,%r12
496	adcxq	%rbx,%r13
497	mulxq	(%rsp),%rax,%rbx
498	adoxq	%rax,%r13
499	adcxq	%rbx,%r14
500	mulxq	%r15,%rax,%r15
501	movl	$38,%edx
502	adoxq	%rax,%r14
503	adcxq	%rdi,%r15
504	adoxq	%rdi,%r15
505
506	jmp	.Lreduce64
507.Lfe64_mul_epilogue:
508.cfi_endproc
509.size	x25519_fe64_mul,.-x25519_fe64_mul
510
511.globl	x25519_fe64_sqr
512.type	x25519_fe64_sqr,@function
513.align	32
514x25519_fe64_sqr:
515.cfi_startproc
516	pushq	%rbp
517.cfi_adjust_cfa_offset	8
518.cfi_offset	%rbp,-16
519	pushq	%rbx
520.cfi_adjust_cfa_offset	8
521.cfi_offset	%rbx,-24
522	pushq	%r12
523.cfi_adjust_cfa_offset	8
524.cfi_offset	%r12,-32
525	pushq	%r13
526.cfi_adjust_cfa_offset	8
527.cfi_offset	%r13,-40
528	pushq	%r14
529.cfi_adjust_cfa_offset	8
530.cfi_offset	%r14,-48
531	pushq	%r15
532.cfi_adjust_cfa_offset	8
533.cfi_offset	%r15,-56
534	pushq	%rdi
535.cfi_adjust_cfa_offset	8
536.cfi_offset	%rdi,-64
537	leaq	-16(%rsp),%rsp
538.cfi_adjust_cfa_offset	16
539.Lfe64_sqr_body:
540
541	movq	0(%rsi),%rdx
542	movq	8(%rsi),%rcx
543	movq	16(%rsi),%rbp
544	movq	24(%rsi),%rsi
545
546
547	mulxq	%rdx,%r8,%r15
548	mulxq	%rcx,%r9,%rax
549	xorl	%edi,%edi
550	mulxq	%rbp,%r10,%rbx
551	adcxq	%rax,%r10
552	mulxq	%rsi,%r11,%r12
553	movq	%rcx,%rdx
554	adcxq	%rbx,%r11
555	adcxq	%rdi,%r12
556
557
558	mulxq	%rbp,%rax,%rbx
559	adoxq	%rax,%r11
560	adcxq	%rbx,%r12
561	mulxq	%rsi,%rax,%r13
562	movq	%rbp,%rdx
563	adoxq	%rax,%r12
564	adcxq	%rdi,%r13
565
566
567	mulxq	%rsi,%rax,%r14
568	movq	%rcx,%rdx
569	adoxq	%rax,%r13
570	adcxq	%rdi,%r14
571	adoxq	%rdi,%r14
572
573	adcxq	%r9,%r9
574	adoxq	%r15,%r9
575	adcxq	%r10,%r10
576	mulxq	%rdx,%rax,%rbx
577	movq	%rbp,%rdx
578	adcxq	%r11,%r11
579	adoxq	%rax,%r10
580	adcxq	%r12,%r12
581	adoxq	%rbx,%r11
582	mulxq	%rdx,%rax,%rbx
583	movq	%rsi,%rdx
584	adcxq	%r13,%r13
585	adoxq	%rax,%r12
586	adcxq	%r14,%r14
587	adoxq	%rbx,%r13
588	mulxq	%rdx,%rax,%r15
589	movl	$38,%edx
590	adoxq	%rax,%r14
591	adcxq	%rdi,%r15
592	adoxq	%rdi,%r15
593	jmp	.Lreduce64
594
595.align	32
596.Lreduce64:
597	mulxq	%r12,%rax,%rbx
598	adcxq	%rax,%r8
599	adoxq	%rbx,%r9
600	mulxq	%r13,%rax,%rbx
601	adcxq	%rax,%r9
602	adoxq	%rbx,%r10
603	mulxq	%r14,%rax,%rbx
604	adcxq	%rax,%r10
605	adoxq	%rbx,%r11
606	mulxq	%r15,%rax,%r12
607	adcxq	%rax,%r11
608	adoxq	%rdi,%r12
609	adcxq	%rdi,%r12
610
611	movq	16(%rsp),%rdi
612	imulq	%rdx,%r12
613
614	addq	%r12,%r8
615	adcq	$0,%r9
616	adcq	$0,%r10
617	adcq	$0,%r11
618
619	sbbq	%rax,%rax
620	andq	$38,%rax
621
622	addq	%rax,%r8
623	movq	%r9,8(%rdi)
624	movq	%r10,16(%rdi)
625	movq	%r11,24(%rdi)
626	movq	%r8,0(%rdi)
627
628	movq	24(%rsp),%r15
629.cfi_restore	%r15
630	movq	32(%rsp),%r14
631.cfi_restore	%r14
632	movq	40(%rsp),%r13
633.cfi_restore	%r13
634	movq	48(%rsp),%r12
635.cfi_restore	%r12
636	movq	56(%rsp),%rbx
637.cfi_restore	%rbx
638	movq	64(%rsp),%rbp
639.cfi_restore	%rbp
640	leaq	72(%rsp),%rsp
641.cfi_adjust_cfa_offset	88
642.Lfe64_sqr_epilogue:
643	.byte	0xf3,0xc3
644.cfi_endproc
645.size	x25519_fe64_sqr,.-x25519_fe64_sqr
646
647.globl	x25519_fe64_mul121666
648.type	x25519_fe64_mul121666,@function
649.align	32
650x25519_fe64_mul121666:
651.Lfe64_mul121666_body:
652	movl	$121666,%edx
653	mulxq	0(%rsi),%r8,%rcx
654	mulxq	8(%rsi),%r9,%rax
655	addq	%rcx,%r9
656	mulxq	16(%rsi),%r10,%rcx
657	adcq	%rax,%r10
658	mulxq	24(%rsi),%r11,%rax
659	adcq	%rcx,%r11
660	adcq	$0,%rax
661
662	imulq	$38,%rax,%rax
663
664	addq	%rax,%r8
665	adcq	$0,%r9
666	adcq	$0,%r10
667	adcq	$0,%r11
668
669	sbbq	%rax,%rax
670	andq	$38,%rax
671
672	addq	%rax,%r8
673	movq	%r9,8(%rdi)
674	movq	%r10,16(%rdi)
675	movq	%r11,24(%rdi)
676	movq	%r8,0(%rdi)
677
678.Lfe64_mul121666_epilogue:
679	.byte	0xf3,0xc3
680.size	x25519_fe64_mul121666,.-x25519_fe64_mul121666
681
682.globl	x25519_fe64_add
683.type	x25519_fe64_add,@function
684.align	32
685x25519_fe64_add:
686.Lfe64_add_body:
687	movq	0(%rsi),%r8
688	movq	8(%rsi),%r9
689	movq	16(%rsi),%r10
690	movq	24(%rsi),%r11
691
692	addq	0(%rdx),%r8
693	adcq	8(%rdx),%r9
694	adcq	16(%rdx),%r10
695	adcq	24(%rdx),%r11
696
697	sbbq	%rax,%rax
698	andq	$38,%rax
699
700	addq	%rax,%r8
701	adcq	$0,%r9
702	adcq	$0,%r10
703	movq	%r9,8(%rdi)
704	adcq	$0,%r11
705	movq	%r10,16(%rdi)
706	sbbq	%rax,%rax
707	movq	%r11,24(%rdi)
708	andq	$38,%rax
709
710	addq	%rax,%r8
711	movq	%r8,0(%rdi)
712
713.Lfe64_add_epilogue:
714	.byte	0xf3,0xc3
715.size	x25519_fe64_add,.-x25519_fe64_add
716
717.globl	x25519_fe64_sub
718.type	x25519_fe64_sub,@function
719.align	32
720x25519_fe64_sub:
721.Lfe64_sub_body:
722	movq	0(%rsi),%r8
723	movq	8(%rsi),%r9
724	movq	16(%rsi),%r10
725	movq	24(%rsi),%r11
726
727	subq	0(%rdx),%r8
728	sbbq	8(%rdx),%r9
729	sbbq	16(%rdx),%r10
730	sbbq	24(%rdx),%r11
731
732	sbbq	%rax,%rax
733	andq	$38,%rax
734
735	subq	%rax,%r8
736	sbbq	$0,%r9
737	sbbq	$0,%r10
738	movq	%r9,8(%rdi)
739	sbbq	$0,%r11
740	movq	%r10,16(%rdi)
741	sbbq	%rax,%rax
742	movq	%r11,24(%rdi)
743	andq	$38,%rax
744
745	subq	%rax,%r8
746	movq	%r8,0(%rdi)
747
748.Lfe64_sub_epilogue:
749	.byte	0xf3,0xc3
750.size	x25519_fe64_sub,.-x25519_fe64_sub
751
752.globl	x25519_fe64_tobytes
753.type	x25519_fe64_tobytes,@function
754.align	32
755x25519_fe64_tobytes:
756.Lfe64_to_body:
757	movq	0(%rsi),%r8
758	movq	8(%rsi),%r9
759	movq	16(%rsi),%r10
760	movq	24(%rsi),%r11
761
762
763	leaq	(%r11,%r11,1),%rax
764	sarq	$63,%r11
765	shrq	$1,%rax
766	andq	$19,%r11
767	addq	$19,%r11
768
769	addq	%r11,%r8
770	adcq	$0,%r9
771	adcq	$0,%r10
772	adcq	$0,%rax
773
774	leaq	(%rax,%rax,1),%r11
775	sarq	$63,%rax
776	shrq	$1,%r11
777	notq	%rax
778	andq	$19,%rax
779
780	subq	%rax,%r8
781	sbbq	$0,%r9
782	sbbq	$0,%r10
783	sbbq	$0,%r11
784
785	movq	%r8,0(%rdi)
786	movq	%r9,8(%rdi)
787	movq	%r10,16(%rdi)
788	movq	%r11,24(%rdi)
789
790.Lfe64_to_epilogue:
791	.byte	0xf3,0xc3
792.size	x25519_fe64_tobytes,.-x25519_fe64_tobytes
793.byte	88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101,115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
794