x86_64-mont5.S revision 238405
1238405Sjkim	# $FreeBSD: head/secure/lib/libcrypto/amd64/x86_64-mont5.S 238405 2012-07-12 19:30:53Z jkim $
2238405Sjkim.text
3238405Sjkim
4238405Sjkim.globl	bn_mul_mont_gather5
5238405Sjkim.type	bn_mul_mont_gather5,@function
6238405Sjkim.align	64
7238405Sjkimbn_mul_mont_gather5:
8238405Sjkim	testl	$3,%r9d
9238405Sjkim	jnz	.Lmul_enter
10238405Sjkim	cmpl	$8,%r9d
11238405Sjkim	jb	.Lmul_enter
12238405Sjkim	jmp	.Lmul4x_enter
13238405Sjkim
14238405Sjkim.align	16
15238405Sjkim.Lmul_enter:
16238405Sjkim	movl	%r9d,%r9d
17238405Sjkim	movl	8(%rsp),%r10d
18238405Sjkim	pushq	%rbx
19238405Sjkim	pushq	%rbp
20238405Sjkim	pushq	%r12
21238405Sjkim	pushq	%r13
22238405Sjkim	pushq	%r14
23238405Sjkim	pushq	%r15
24238405Sjkim	movq	%rsp,%rax
25238405Sjkim	leaq	2(%r9),%r11
26238405Sjkim	negq	%r11
27238405Sjkim	leaq	(%rsp,%r11,8),%rsp
28238405Sjkim	andq	$-1024,%rsp
29238405Sjkim
30238405Sjkim	movq	%rax,8(%rsp,%r9,8)
31238405Sjkim.Lmul_body:
32238405Sjkim	movq	%rdx,%r12
33238405Sjkim	movq	%r10,%r11
34238405Sjkim	shrq	$3,%r10
35238405Sjkim	andq	$7,%r11
36238405Sjkim	notq	%r10
37238405Sjkim	leaq	.Lmagic_masks(%rip),%rax
38238405Sjkim	andq	$3,%r10
39238405Sjkim	leaq	96(%r12,%r11,8),%r12
40238405Sjkim	movq	0(%rax,%r10,8),%xmm4
41238405Sjkim	movq	8(%rax,%r10,8),%xmm5
42238405Sjkim	movq	16(%rax,%r10,8),%xmm6
43238405Sjkim	movq	24(%rax,%r10,8),%xmm7
44238405Sjkim
45238405Sjkim	movq	-96(%r12),%xmm0
46238405Sjkim	movq	-32(%r12),%xmm1
47238405Sjkim	pand	%xmm4,%xmm0
48238405Sjkim	movq	32(%r12),%xmm2
49238405Sjkim	pand	%xmm5,%xmm1
50238405Sjkim	movq	96(%r12),%xmm3
51238405Sjkim	pand	%xmm6,%xmm2
52238405Sjkim	por	%xmm1,%xmm0
53238405Sjkim	pand	%xmm7,%xmm3
54238405Sjkim	por	%xmm2,%xmm0
55238405Sjkim	leaq	256(%r12),%r12
56238405Sjkim	por	%xmm3,%xmm0
57238405Sjkim
58238405Sjkim.byte	102,72,15,126,195
59238405Sjkim
60238405Sjkim	movq	(%r8),%r8
61238405Sjkim	movq	(%rsi),%rax
62238405Sjkim
63238405Sjkim	xorq	%r14,%r14
64238405Sjkim	xorq	%r15,%r15
65238405Sjkim
66238405Sjkim	movq	-96(%r12),%xmm0
67238405Sjkim	movq	-32(%r12),%xmm1
68238405Sjkim	pand	%xmm4,%xmm0
69238405Sjkim	movq	32(%r12),%xmm2
70238405Sjkim	pand	%xmm5,%xmm1
71238405Sjkim
72238405Sjkim	movq	%r8,%rbp
73238405Sjkim	mulq	%rbx
74238405Sjkim	movq	%rax,%r10
75238405Sjkim	movq	(%rcx),%rax
76238405Sjkim
77238405Sjkim	movq	96(%r12),%xmm3
78238405Sjkim	pand	%xmm6,%xmm2
79238405Sjkim	por	%xmm1,%xmm0
80238405Sjkim	pand	%xmm7,%xmm3
81238405Sjkim
82238405Sjkim	imulq	%r10,%rbp
83238405Sjkim	movq	%rdx,%r11
84238405Sjkim
85238405Sjkim	por	%xmm2,%xmm0
86238405Sjkim	leaq	256(%r12),%r12
87238405Sjkim	por	%xmm3,%xmm0
88238405Sjkim
89238405Sjkim	mulq	%rbp
90238405Sjkim	addq	%rax,%r10
91238405Sjkim	movq	8(%rsi),%rax
92238405Sjkim	adcq	$0,%rdx
93238405Sjkim	movq	%rdx,%r13
94238405Sjkim
95238405Sjkim	leaq	1(%r15),%r15
96238405Sjkim	jmp	.L1st_enter
97238405Sjkim
98238405Sjkim.align	16
99238405Sjkim.L1st:
100238405Sjkim	addq	%rax,%r13
101238405Sjkim	movq	(%rsi,%r15,8),%rax
102238405Sjkim	adcq	$0,%rdx
103238405Sjkim	addq	%r11,%r13
104238405Sjkim	movq	%r10,%r11
105238405Sjkim	adcq	$0,%rdx
106238405Sjkim	movq	%r13,-16(%rsp,%r15,8)
107238405Sjkim	movq	%rdx,%r13
108238405Sjkim
109238405Sjkim.L1st_enter:
110238405Sjkim	mulq	%rbx
111238405Sjkim	addq	%rax,%r11
112238405Sjkim	movq	(%rcx,%r15,8),%rax
113238405Sjkim	adcq	$0,%rdx
114238405Sjkim	leaq	1(%r15),%r15
115238405Sjkim	movq	%rdx,%r10
116238405Sjkim
117238405Sjkim	mulq	%rbp
118238405Sjkim	cmpq	%r9,%r15
119238405Sjkim	jne	.L1st
120238405Sjkim
121238405Sjkim.byte	102,72,15,126,195
122238405Sjkim
123238405Sjkim	addq	%rax,%r13
124238405Sjkim	movq	(%rsi),%rax
125238405Sjkim	adcq	$0,%rdx
126238405Sjkim	addq	%r11,%r13
127238405Sjkim	adcq	$0,%rdx
128238405Sjkim	movq	%r13,-16(%rsp,%r15,8)
129238405Sjkim	movq	%rdx,%r13
130238405Sjkim	movq	%r10,%r11
131238405Sjkim
132238405Sjkim	xorq	%rdx,%rdx
133238405Sjkim	addq	%r11,%r13
134238405Sjkim	adcq	$0,%rdx
135238405Sjkim	movq	%r13,-8(%rsp,%r9,8)
136238405Sjkim	movq	%rdx,(%rsp,%r9,8)
137238405Sjkim
138238405Sjkim	leaq	1(%r14),%r14
139238405Sjkim	jmp	.Louter
140238405Sjkim.align	16
141238405Sjkim.Louter:
142238405Sjkim	xorq	%r15,%r15
143238405Sjkim	movq	%r8,%rbp
144238405Sjkim	movq	(%rsp),%r10
145238405Sjkim
146238405Sjkim	movq	-96(%r12),%xmm0
147238405Sjkim	movq	-32(%r12),%xmm1
148238405Sjkim	pand	%xmm4,%xmm0
149238405Sjkim	movq	32(%r12),%xmm2
150238405Sjkim	pand	%xmm5,%xmm1
151238405Sjkim
152238405Sjkim	mulq	%rbx
153238405Sjkim	addq	%rax,%r10
154238405Sjkim	movq	(%rcx),%rax
155238405Sjkim	adcq	$0,%rdx
156238405Sjkim
157238405Sjkim	movq	96(%r12),%xmm3
158238405Sjkim	pand	%xmm6,%xmm2
159238405Sjkim	por	%xmm1,%xmm0
160238405Sjkim	pand	%xmm7,%xmm3
161238405Sjkim
162238405Sjkim	imulq	%r10,%rbp
163238405Sjkim	movq	%rdx,%r11
164238405Sjkim
165238405Sjkim	por	%xmm2,%xmm0
166238405Sjkim	leaq	256(%r12),%r12
167238405Sjkim	por	%xmm3,%xmm0
168238405Sjkim
169238405Sjkim	mulq	%rbp
170238405Sjkim	addq	%rax,%r10
171238405Sjkim	movq	8(%rsi),%rax
172238405Sjkim	adcq	$0,%rdx
173238405Sjkim	movq	8(%rsp),%r10
174238405Sjkim	movq	%rdx,%r13
175238405Sjkim
176238405Sjkim	leaq	1(%r15),%r15
177238405Sjkim	jmp	.Linner_enter
178238405Sjkim
179238405Sjkim.align	16
180238405Sjkim.Linner:
181238405Sjkim	addq	%rax,%r13
182238405Sjkim	movq	(%rsi,%r15,8),%rax
183238405Sjkim	adcq	$0,%rdx
184238405Sjkim	addq	%r10,%r13
185238405Sjkim	movq	(%rsp,%r15,8),%r10
186238405Sjkim	adcq	$0,%rdx
187238405Sjkim	movq	%r13,-16(%rsp,%r15,8)
188238405Sjkim	movq	%rdx,%r13
189238405Sjkim
190238405Sjkim.Linner_enter:
191238405Sjkim	mulq	%rbx
192238405Sjkim	addq	%rax,%r11
193238405Sjkim	movq	(%rcx,%r15,8),%rax
194238405Sjkim	adcq	$0,%rdx
195238405Sjkim	addq	%r11,%r10
196238405Sjkim	movq	%rdx,%r11
197238405Sjkim	adcq	$0,%r11
198238405Sjkim	leaq	1(%r15),%r15
199238405Sjkim
200238405Sjkim	mulq	%rbp
201238405Sjkim	cmpq	%r9,%r15
202238405Sjkim	jne	.Linner
203238405Sjkim
204238405Sjkim.byte	102,72,15,126,195
205238405Sjkim
206238405Sjkim	addq	%rax,%r13
207238405Sjkim	movq	(%rsi),%rax
208238405Sjkim	adcq	$0,%rdx
209238405Sjkim	addq	%r10,%r13
210238405Sjkim	movq	(%rsp,%r15,8),%r10
211238405Sjkim	adcq	$0,%rdx
212238405Sjkim	movq	%r13,-16(%rsp,%r15,8)
213238405Sjkim	movq	%rdx,%r13
214238405Sjkim
215238405Sjkim	xorq	%rdx,%rdx
216238405Sjkim	addq	%r11,%r13
217238405Sjkim	adcq	$0,%rdx
218238405Sjkim	addq	%r10,%r13
219238405Sjkim	adcq	$0,%rdx
220238405Sjkim	movq	%r13,-8(%rsp,%r9,8)
221238405Sjkim	movq	%rdx,(%rsp,%r9,8)
222238405Sjkim
223238405Sjkim	leaq	1(%r14),%r14
224238405Sjkim	cmpq	%r9,%r14
225238405Sjkim	jl	.Louter
226238405Sjkim
227238405Sjkim	xorq	%r14,%r14
228238405Sjkim	movq	(%rsp),%rax
229238405Sjkim	leaq	(%rsp),%rsi
230238405Sjkim	movq	%r9,%r15
231238405Sjkim	jmp	.Lsub
232238405Sjkim.align	16
233238405Sjkim.Lsub:	sbbq	(%rcx,%r14,8),%rax
234238405Sjkim	movq	%rax,(%rdi,%r14,8)
235238405Sjkim	movq	8(%rsi,%r14,8),%rax
236238405Sjkim	leaq	1(%r14),%r14
237238405Sjkim	decq	%r15
238238405Sjkim	jnz	.Lsub
239238405Sjkim
240238405Sjkim	sbbq	$0,%rax
241238405Sjkim	xorq	%r14,%r14
242238405Sjkim	andq	%rax,%rsi
243238405Sjkim	notq	%rax
244238405Sjkim	movq	%rdi,%rcx
245238405Sjkim	andq	%rax,%rcx
246238405Sjkim	movq	%r9,%r15
247238405Sjkim	orq	%rcx,%rsi
248238405Sjkim.align	16
249238405Sjkim.Lcopy:
250238405Sjkim	movq	(%rsi,%r14,8),%rax
251238405Sjkim	movq	%r14,(%rsp,%r14,8)
252238405Sjkim	movq	%rax,(%rdi,%r14,8)
253238405Sjkim	leaq	1(%r14),%r14
254238405Sjkim	subq	$1,%r15
255238405Sjkim	jnz	.Lcopy
256238405Sjkim
257238405Sjkim	movq	8(%rsp,%r9,8),%rsi
258238405Sjkim	movq	$1,%rax
259238405Sjkim	movq	(%rsi),%r15
260238405Sjkim	movq	8(%rsi),%r14
261238405Sjkim	movq	16(%rsi),%r13
262238405Sjkim	movq	24(%rsi),%r12
263238405Sjkim	movq	32(%rsi),%rbp
264238405Sjkim	movq	40(%rsi),%rbx
265238405Sjkim	leaq	48(%rsi),%rsp
266238405Sjkim.Lmul_epilogue:
267238405Sjkim	.byte	0xf3,0xc3
268238405Sjkim.size	bn_mul_mont_gather5,.-bn_mul_mont_gather5
269238405Sjkim.type	bn_mul4x_mont_gather5,@function
270238405Sjkim.align	16
271238405Sjkimbn_mul4x_mont_gather5:
272238405Sjkim.Lmul4x_enter:
273238405Sjkim	movl	%r9d,%r9d
274238405Sjkim	movl	8(%rsp),%r10d
275238405Sjkim	pushq	%rbx
276238405Sjkim	pushq	%rbp
277238405Sjkim	pushq	%r12
278238405Sjkim	pushq	%r13
279238405Sjkim	pushq	%r14
280238405Sjkim	pushq	%r15
281238405Sjkim	movq	%rsp,%rax
282238405Sjkim	leaq	4(%r9),%r11
283238405Sjkim	negq	%r11
284238405Sjkim	leaq	(%rsp,%r11,8),%rsp
285238405Sjkim	andq	$-1024,%rsp
286238405Sjkim
287238405Sjkim	movq	%rax,8(%rsp,%r9,8)
288238405Sjkim.Lmul4x_body:
289238405Sjkim	movq	%rdi,16(%rsp,%r9,8)
290238405Sjkim	movq	%rdx,%r12
291238405Sjkim	movq	%r10,%r11
292238405Sjkim	shrq	$3,%r10
293238405Sjkim	andq	$7,%r11
294238405Sjkim	notq	%r10
295238405Sjkim	leaq	.Lmagic_masks(%rip),%rax
296238405Sjkim	andq	$3,%r10
297238405Sjkim	leaq	96(%r12,%r11,8),%r12
298238405Sjkim	movq	0(%rax,%r10,8),%xmm4
299238405Sjkim	movq	8(%rax,%r10,8),%xmm5
300238405Sjkim	movq	16(%rax,%r10,8),%xmm6
301238405Sjkim	movq	24(%rax,%r10,8),%xmm7
302238405Sjkim
303238405Sjkim	movq	-96(%r12),%xmm0
304238405Sjkim	movq	-32(%r12),%xmm1
305238405Sjkim	pand	%xmm4,%xmm0
306238405Sjkim	movq	32(%r12),%xmm2
307238405Sjkim	pand	%xmm5,%xmm1
308238405Sjkim	movq	96(%r12),%xmm3
309238405Sjkim	pand	%xmm6,%xmm2
310238405Sjkim	por	%xmm1,%xmm0
311238405Sjkim	pand	%xmm7,%xmm3
312238405Sjkim	por	%xmm2,%xmm0
313238405Sjkim	leaq	256(%r12),%r12
314238405Sjkim	por	%xmm3,%xmm0
315238405Sjkim
316238405Sjkim.byte	102,72,15,126,195
317238405Sjkim	movq	(%r8),%r8
318238405Sjkim	movq	(%rsi),%rax
319238405Sjkim
320238405Sjkim	xorq	%r14,%r14
321238405Sjkim	xorq	%r15,%r15
322238405Sjkim
323238405Sjkim	movq	-96(%r12),%xmm0
324238405Sjkim	movq	-32(%r12),%xmm1
325238405Sjkim	pand	%xmm4,%xmm0
326238405Sjkim	movq	32(%r12),%xmm2
327238405Sjkim	pand	%xmm5,%xmm1
328238405Sjkim
329238405Sjkim	movq	%r8,%rbp
330238405Sjkim	mulq	%rbx
331238405Sjkim	movq	%rax,%r10
332238405Sjkim	movq	(%rcx),%rax
333238405Sjkim
334238405Sjkim	movq	96(%r12),%xmm3
335238405Sjkim	pand	%xmm6,%xmm2
336238405Sjkim	por	%xmm1,%xmm0
337238405Sjkim	pand	%xmm7,%xmm3
338238405Sjkim
339238405Sjkim	imulq	%r10,%rbp
340238405Sjkim	movq	%rdx,%r11
341238405Sjkim
342238405Sjkim	por	%xmm2,%xmm0
343238405Sjkim	leaq	256(%r12),%r12
344238405Sjkim	por	%xmm3,%xmm0
345238405Sjkim
346238405Sjkim	mulq	%rbp
347238405Sjkim	addq	%rax,%r10
348238405Sjkim	movq	8(%rsi),%rax
349238405Sjkim	adcq	$0,%rdx
350238405Sjkim	movq	%rdx,%rdi
351238405Sjkim
352238405Sjkim	mulq	%rbx
353238405Sjkim	addq	%rax,%r11
354238405Sjkim	movq	8(%rcx),%rax
355238405Sjkim	adcq	$0,%rdx
356238405Sjkim	movq	%rdx,%r10
357238405Sjkim
358238405Sjkim	mulq	%rbp
359238405Sjkim	addq	%rax,%rdi
360238405Sjkim	movq	16(%rsi),%rax
361238405Sjkim	adcq	$0,%rdx
362238405Sjkim	addq	%r11,%rdi
363238405Sjkim	leaq	4(%r15),%r15
364238405Sjkim	adcq	$0,%rdx
365238405Sjkim	movq	%rdi,(%rsp)
366238405Sjkim	movq	%rdx,%r13
367238405Sjkim	jmp	.L1st4x
368238405Sjkim.align	16
369238405Sjkim.L1st4x:
370238405Sjkim	mulq	%rbx
371238405Sjkim	addq	%rax,%r10
372238405Sjkim	movq	-16(%rcx,%r15,8),%rax
373238405Sjkim	adcq	$0,%rdx
374238405Sjkim	movq	%rdx,%r11
375238405Sjkim
376238405Sjkim	mulq	%rbp
377238405Sjkim	addq	%rax,%r13
378238405Sjkim	movq	-8(%rsi,%r15,8),%rax
379238405Sjkim	adcq	$0,%rdx
380238405Sjkim	addq	%r10,%r13
381238405Sjkim	adcq	$0,%rdx
382238405Sjkim	movq	%r13,-24(%rsp,%r15,8)
383238405Sjkim	movq	%rdx,%rdi
384238405Sjkim
385238405Sjkim	mulq	%rbx
386238405Sjkim	addq	%rax,%r11
387238405Sjkim	movq	-8(%rcx,%r15,8),%rax
388238405Sjkim	adcq	$0,%rdx
389238405Sjkim	movq	%rdx,%r10
390238405Sjkim
391238405Sjkim	mulq	%rbp
392238405Sjkim	addq	%rax,%rdi
393238405Sjkim	movq	(%rsi,%r15,8),%rax
394238405Sjkim	adcq	$0,%rdx
395238405Sjkim	addq	%r11,%rdi
396238405Sjkim	adcq	$0,%rdx
397238405Sjkim	movq	%rdi,-16(%rsp,%r15,8)
398238405Sjkim	movq	%rdx,%r13
399238405Sjkim
400238405Sjkim	mulq	%rbx
401238405Sjkim	addq	%rax,%r10
402238405Sjkim	movq	(%rcx,%r15,8),%rax
403238405Sjkim	adcq	$0,%rdx
404238405Sjkim	movq	%rdx,%r11
405238405Sjkim
406238405Sjkim	mulq	%rbp
407238405Sjkim	addq	%rax,%r13
408238405Sjkim	movq	8(%rsi,%r15,8),%rax
409238405Sjkim	adcq	$0,%rdx
410238405Sjkim	addq	%r10,%r13
411238405Sjkim	adcq	$0,%rdx
412238405Sjkim	movq	%r13,-8(%rsp,%r15,8)
413238405Sjkim	movq	%rdx,%rdi
414238405Sjkim
415238405Sjkim	mulq	%rbx
416238405Sjkim	addq	%rax,%r11
417238405Sjkim	movq	8(%rcx,%r15,8),%rax
418238405Sjkim	adcq	$0,%rdx
419238405Sjkim	leaq	4(%r15),%r15
420238405Sjkim	movq	%rdx,%r10
421238405Sjkim
422238405Sjkim	mulq	%rbp
423238405Sjkim	addq	%rax,%rdi
424238405Sjkim	movq	-16(%rsi,%r15,8),%rax
425238405Sjkim	adcq	$0,%rdx
426238405Sjkim	addq	%r11,%rdi
427238405Sjkim	adcq	$0,%rdx
428238405Sjkim	movq	%rdi,-32(%rsp,%r15,8)
429238405Sjkim	movq	%rdx,%r13
430238405Sjkim	cmpq	%r9,%r15
431238405Sjkim	jl	.L1st4x
432238405Sjkim
433238405Sjkim	mulq	%rbx
434238405Sjkim	addq	%rax,%r10
435238405Sjkim	movq	-16(%rcx,%r15,8),%rax
436238405Sjkim	adcq	$0,%rdx
437238405Sjkim	movq	%rdx,%r11
438238405Sjkim
439238405Sjkim	mulq	%rbp
440238405Sjkim	addq	%rax,%r13
441238405Sjkim	movq	-8(%rsi,%r15,8),%rax
442238405Sjkim	adcq	$0,%rdx
443238405Sjkim	addq	%r10,%r13
444238405Sjkim	adcq	$0,%rdx
445238405Sjkim	movq	%r13,-24(%rsp,%r15,8)
446238405Sjkim	movq	%rdx,%rdi
447238405Sjkim
448238405Sjkim	mulq	%rbx
449238405Sjkim	addq	%rax,%r11
450238405Sjkim	movq	-8(%rcx,%r15,8),%rax
451238405Sjkim	adcq	$0,%rdx
452238405Sjkim	movq	%rdx,%r10
453238405Sjkim
454238405Sjkim	mulq	%rbp
455238405Sjkim	addq	%rax,%rdi
456238405Sjkim	movq	(%rsi),%rax
457238405Sjkim	adcq	$0,%rdx
458238405Sjkim	addq	%r11,%rdi
459238405Sjkim	adcq	$0,%rdx
460238405Sjkim	movq	%rdi,-16(%rsp,%r15,8)
461238405Sjkim	movq	%rdx,%r13
462238405Sjkim
463238405Sjkim.byte	102,72,15,126,195
464238405Sjkim
465238405Sjkim	xorq	%rdi,%rdi
466238405Sjkim	addq	%r10,%r13
467238405Sjkim	adcq	$0,%rdi
468238405Sjkim	movq	%r13,-8(%rsp,%r15,8)
469238405Sjkim	movq	%rdi,(%rsp,%r15,8)
470238405Sjkim
471238405Sjkim	leaq	1(%r14),%r14
472238405Sjkim.align	4
473238405Sjkim.Louter4x:
474238405Sjkim	xorq	%r15,%r15
475238405Sjkim	movq	-96(%r12),%xmm0
476238405Sjkim	movq	-32(%r12),%xmm1
477238405Sjkim	pand	%xmm4,%xmm0
478238405Sjkim	movq	32(%r12),%xmm2
479238405Sjkim	pand	%xmm5,%xmm1
480238405Sjkim
481238405Sjkim	movq	(%rsp),%r10
482238405Sjkim	movq	%r8,%rbp
483238405Sjkim	mulq	%rbx
484238405Sjkim	addq	%rax,%r10
485238405Sjkim	movq	(%rcx),%rax
486238405Sjkim	adcq	$0,%rdx
487238405Sjkim
488238405Sjkim	movq	96(%r12),%xmm3
489238405Sjkim	pand	%xmm6,%xmm2
490238405Sjkim	por	%xmm1,%xmm0
491238405Sjkim	pand	%xmm7,%xmm3
492238405Sjkim
493238405Sjkim	imulq	%r10,%rbp
494238405Sjkim	movq	%rdx,%r11
495238405Sjkim
496238405Sjkim	por	%xmm2,%xmm0
497238405Sjkim	leaq	256(%r12),%r12
498238405Sjkim	por	%xmm3,%xmm0
499238405Sjkim
500238405Sjkim	mulq	%rbp
501238405Sjkim	addq	%rax,%r10
502238405Sjkim	movq	8(%rsi),%rax
503238405Sjkim	adcq	$0,%rdx
504238405Sjkim	movq	%rdx,%rdi
505238405Sjkim
506238405Sjkim	mulq	%rbx
507238405Sjkim	addq	%rax,%r11
508238405Sjkim	movq	8(%rcx),%rax
509238405Sjkim	adcq	$0,%rdx
510238405Sjkim	addq	8(%rsp),%r11
511238405Sjkim	adcq	$0,%rdx
512238405Sjkim	movq	%rdx,%r10
513238405Sjkim
514238405Sjkim	mulq	%rbp
515238405Sjkim	addq	%rax,%rdi
516238405Sjkim	movq	16(%rsi),%rax
517238405Sjkim	adcq	$0,%rdx
518238405Sjkim	addq	%r11,%rdi
519238405Sjkim	leaq	4(%r15),%r15
520238405Sjkim	adcq	$0,%rdx
521238405Sjkim	movq	%rdx,%r13
522238405Sjkim	jmp	.Linner4x
523238405Sjkim.align	16
524238405Sjkim.Linner4x:
525238405Sjkim	mulq	%rbx
526238405Sjkim	addq	%rax,%r10
527238405Sjkim	movq	-16(%rcx,%r15,8),%rax
528238405Sjkim	adcq	$0,%rdx
529238405Sjkim	addq	-16(%rsp,%r15,8),%r10
530238405Sjkim	adcq	$0,%rdx
531238405Sjkim	movq	%rdx,%r11
532238405Sjkim
533238405Sjkim	mulq	%rbp
534238405Sjkim	addq	%rax,%r13
535238405Sjkim	movq	-8(%rsi,%r15,8),%rax
536238405Sjkim	adcq	$0,%rdx
537238405Sjkim	addq	%r10,%r13
538238405Sjkim	adcq	$0,%rdx
539238405Sjkim	movq	%rdi,-32(%rsp,%r15,8)
540238405Sjkim	movq	%rdx,%rdi
541238405Sjkim
542238405Sjkim	mulq	%rbx
543238405Sjkim	addq	%rax,%r11
544238405Sjkim	movq	-8(%rcx,%r15,8),%rax
545238405Sjkim	adcq	$0,%rdx
546238405Sjkim	addq	-8(%rsp,%r15,8),%r11
547238405Sjkim	adcq	$0,%rdx
548238405Sjkim	movq	%rdx,%r10
549238405Sjkim
550238405Sjkim	mulq	%rbp
551238405Sjkim	addq	%rax,%rdi
552238405Sjkim	movq	(%rsi,%r15,8),%rax
553238405Sjkim	adcq	$0,%rdx
554238405Sjkim	addq	%r11,%rdi
555238405Sjkim	adcq	$0,%rdx
556238405Sjkim	movq	%r13,-24(%rsp,%r15,8)
557238405Sjkim	movq	%rdx,%r13
558238405Sjkim
559238405Sjkim	mulq	%rbx
560238405Sjkim	addq	%rax,%r10
561238405Sjkim	movq	(%rcx,%r15,8),%rax
562238405Sjkim	adcq	$0,%rdx
563238405Sjkim	addq	(%rsp,%r15,8),%r10
564238405Sjkim	adcq	$0,%rdx
565238405Sjkim	movq	%rdx,%r11
566238405Sjkim
567238405Sjkim	mulq	%rbp
568238405Sjkim	addq	%rax,%r13
569238405Sjkim	movq	8(%rsi,%r15,8),%rax
570238405Sjkim	adcq	$0,%rdx
571238405Sjkim	addq	%r10,%r13
572238405Sjkim	adcq	$0,%rdx
573238405Sjkim	movq	%rdi,-16(%rsp,%r15,8)
574238405Sjkim	movq	%rdx,%rdi
575238405Sjkim
576238405Sjkim	mulq	%rbx
577238405Sjkim	addq	%rax,%r11
578238405Sjkim	movq	8(%rcx,%r15,8),%rax
579238405Sjkim	adcq	$0,%rdx
580238405Sjkim	addq	8(%rsp,%r15,8),%r11
581238405Sjkim	adcq	$0,%rdx
582238405Sjkim	leaq	4(%r15),%r15
583238405Sjkim	movq	%rdx,%r10
584238405Sjkim
585238405Sjkim	mulq	%rbp
586238405Sjkim	addq	%rax,%rdi
587238405Sjkim	movq	-16(%rsi,%r15,8),%rax
588238405Sjkim	adcq	$0,%rdx
589238405Sjkim	addq	%r11,%rdi
590238405Sjkim	adcq	$0,%rdx
591238405Sjkim	movq	%r13,-40(%rsp,%r15,8)
592238405Sjkim	movq	%rdx,%r13
593238405Sjkim	cmpq	%r9,%r15
594238405Sjkim	jl	.Linner4x
595238405Sjkim
596238405Sjkim	mulq	%rbx
597238405Sjkim	addq	%rax,%r10
598238405Sjkim	movq	-16(%rcx,%r15,8),%rax
599238405Sjkim	adcq	$0,%rdx
600238405Sjkim	addq	-16(%rsp,%r15,8),%r10
601238405Sjkim	adcq	$0,%rdx
602238405Sjkim	movq	%rdx,%r11
603238405Sjkim
604238405Sjkim	mulq	%rbp
605238405Sjkim	addq	%rax,%r13
606238405Sjkim	movq	-8(%rsi,%r15,8),%rax
607238405Sjkim	adcq	$0,%rdx
608238405Sjkim	addq	%r10,%r13
609238405Sjkim	adcq	$0,%rdx
610238405Sjkim	movq	%rdi,-32(%rsp,%r15,8)
611238405Sjkim	movq	%rdx,%rdi
612238405Sjkim
613238405Sjkim	mulq	%rbx
614238405Sjkim	addq	%rax,%r11
615238405Sjkim	movq	-8(%rcx,%r15,8),%rax
616238405Sjkim	adcq	$0,%rdx
617238405Sjkim	addq	-8(%rsp,%r15,8),%r11
618238405Sjkim	adcq	$0,%rdx
619238405Sjkim	leaq	1(%r14),%r14
620238405Sjkim	movq	%rdx,%r10
621238405Sjkim
622238405Sjkim	mulq	%rbp
623238405Sjkim	addq	%rax,%rdi
624238405Sjkim	movq	(%rsi),%rax
625238405Sjkim	adcq	$0,%rdx
626238405Sjkim	addq	%r11,%rdi
627238405Sjkim	adcq	$0,%rdx
628238405Sjkim	movq	%r13,-24(%rsp,%r15,8)
629238405Sjkim	movq	%rdx,%r13
630238405Sjkim
631238405Sjkim.byte	102,72,15,126,195
632238405Sjkim	movq	%rdi,-16(%rsp,%r15,8)
633238405Sjkim
634238405Sjkim	xorq	%rdi,%rdi
635238405Sjkim	addq	%r10,%r13
636238405Sjkim	adcq	$0,%rdi
637238405Sjkim	addq	(%rsp,%r9,8),%r13
638238405Sjkim	adcq	$0,%rdi
639238405Sjkim	movq	%r13,-8(%rsp,%r15,8)
640238405Sjkim	movq	%rdi,(%rsp,%r15,8)
641238405Sjkim
642238405Sjkim	cmpq	%r9,%r14
643238405Sjkim	jl	.Louter4x
644238405Sjkim	movq	16(%rsp,%r9,8),%rdi
645238405Sjkim	movq	0(%rsp),%rax
646238405Sjkim	pxor	%xmm0,%xmm0
647238405Sjkim	movq	8(%rsp),%rdx
648238405Sjkim	shrq	$2,%r9
649238405Sjkim	leaq	(%rsp),%rsi
650238405Sjkim	xorq	%r14,%r14
651238405Sjkim
652238405Sjkim	subq	0(%rcx),%rax
653238405Sjkim	movq	16(%rsi),%rbx
654238405Sjkim	movq	24(%rsi),%rbp
655238405Sjkim	sbbq	8(%rcx),%rdx
656238405Sjkim	leaq	-1(%r9),%r15
657238405Sjkim	jmp	.Lsub4x
658238405Sjkim.align	16
659238405Sjkim.Lsub4x:
660238405Sjkim	movq	%rax,0(%rdi,%r14,8)
661238405Sjkim	movq	%rdx,8(%rdi,%r14,8)
662238405Sjkim	sbbq	16(%rcx,%r14,8),%rbx
663238405Sjkim	movq	32(%rsi,%r14,8),%rax
664238405Sjkim	movq	40(%rsi,%r14,8),%rdx
665238405Sjkim	sbbq	24(%rcx,%r14,8),%rbp
666238405Sjkim	movq	%rbx,16(%rdi,%r14,8)
667238405Sjkim	movq	%rbp,24(%rdi,%r14,8)
668238405Sjkim	sbbq	32(%rcx,%r14,8),%rax
669238405Sjkim	movq	48(%rsi,%r14,8),%rbx
670238405Sjkim	movq	56(%rsi,%r14,8),%rbp
671238405Sjkim	sbbq	40(%rcx,%r14,8),%rdx
672238405Sjkim	leaq	4(%r14),%r14
673238405Sjkim	decq	%r15
674238405Sjkim	jnz	.Lsub4x
675238405Sjkim
676238405Sjkim	movq	%rax,0(%rdi,%r14,8)
677238405Sjkim	movq	32(%rsi,%r14,8),%rax
678238405Sjkim	sbbq	16(%rcx,%r14,8),%rbx
679238405Sjkim	movq	%rdx,8(%rdi,%r14,8)
680238405Sjkim	sbbq	24(%rcx,%r14,8),%rbp
681238405Sjkim	movq	%rbx,16(%rdi,%r14,8)
682238405Sjkim
683238405Sjkim	sbbq	$0,%rax
684238405Sjkim	movq	%rbp,24(%rdi,%r14,8)
685238405Sjkim	xorq	%r14,%r14
686238405Sjkim	andq	%rax,%rsi
687238405Sjkim	notq	%rax
688238405Sjkim	movq	%rdi,%rcx
689238405Sjkim	andq	%rax,%rcx
690238405Sjkim	leaq	-1(%r9),%r15
691238405Sjkim	orq	%rcx,%rsi
692238405Sjkim
693238405Sjkim	movdqu	(%rsi),%xmm1
694238405Sjkim	movdqa	%xmm0,(%rsp)
695238405Sjkim	movdqu	%xmm1,(%rdi)
696238405Sjkim	jmp	.Lcopy4x
697238405Sjkim.align	16
698238405Sjkim.Lcopy4x:
699238405Sjkim	movdqu	16(%rsi,%r14,1),%xmm2
700238405Sjkim	movdqu	32(%rsi,%r14,1),%xmm1
701238405Sjkim	movdqa	%xmm0,16(%rsp,%r14,1)
702238405Sjkim	movdqu	%xmm2,16(%rdi,%r14,1)
703238405Sjkim	movdqa	%xmm0,32(%rsp,%r14,1)
704238405Sjkim	movdqu	%xmm1,32(%rdi,%r14,1)
705238405Sjkim	leaq	32(%r14),%r14
706238405Sjkim	decq	%r15
707238405Sjkim	jnz	.Lcopy4x
708238405Sjkim
709238405Sjkim	shlq	$2,%r9
710238405Sjkim	movdqu	16(%rsi,%r14,1),%xmm2
711238405Sjkim	movdqa	%xmm0,16(%rsp,%r14,1)
712238405Sjkim	movdqu	%xmm2,16(%rdi,%r14,1)
713238405Sjkim	movq	8(%rsp,%r9,8),%rsi
714238405Sjkim	movq	$1,%rax
715238405Sjkim	movq	(%rsi),%r15
716238405Sjkim	movq	8(%rsi),%r14
717238405Sjkim	movq	16(%rsi),%r13
718238405Sjkim	movq	24(%rsi),%r12
719238405Sjkim	movq	32(%rsi),%rbp
720238405Sjkim	movq	40(%rsi),%rbx
721238405Sjkim	leaq	48(%rsi),%rsp
722238405Sjkim.Lmul4x_epilogue:
723238405Sjkim	.byte	0xf3,0xc3
724238405Sjkim.size	bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
725238405Sjkim.globl	bn_scatter5
726238405Sjkim.type	bn_scatter5,@function
727238405Sjkim.align	16
728238405Sjkimbn_scatter5:
729238405Sjkim	cmpq	$0,%rsi
730238405Sjkim	jz	.Lscatter_epilogue
731238405Sjkim	leaq	(%rdx,%rcx,8),%rdx
732238405Sjkim.Lscatter:
733238405Sjkim	movq	(%rdi),%rax
734238405Sjkim	leaq	8(%rdi),%rdi
735238405Sjkim	movq	%rax,(%rdx)
736238405Sjkim	leaq	256(%rdx),%rdx
737238405Sjkim	subq	$1,%rsi
738238405Sjkim	jnz	.Lscatter
739238405Sjkim.Lscatter_epilogue:
740238405Sjkim	.byte	0xf3,0xc3
741238405Sjkim.size	bn_scatter5,.-bn_scatter5
742238405Sjkim
743238405Sjkim.globl	bn_gather5
744238405Sjkim.type	bn_gather5,@function
745238405Sjkim.align	16
746238405Sjkimbn_gather5:
747238405Sjkim	movq	%rcx,%r11
748238405Sjkim	shrq	$3,%rcx
749238405Sjkim	andq	$7,%r11
750238405Sjkim	notq	%rcx
751238405Sjkim	leaq	.Lmagic_masks(%rip),%rax
752238405Sjkim	andq	$3,%rcx
753238405Sjkim	leaq	96(%rdx,%r11,8),%rdx
754238405Sjkim	movq	0(%rax,%rcx,8),%xmm4
755238405Sjkim	movq	8(%rax,%rcx,8),%xmm5
756238405Sjkim	movq	16(%rax,%rcx,8),%xmm6
757238405Sjkim	movq	24(%rax,%rcx,8),%xmm7
758238405Sjkim	jmp	.Lgather
759238405Sjkim.align	16
760238405Sjkim.Lgather:
761238405Sjkim	movq	-96(%rdx),%xmm0
762238405Sjkim	movq	-32(%rdx),%xmm1
763238405Sjkim	pand	%xmm4,%xmm0
764238405Sjkim	movq	32(%rdx),%xmm2
765238405Sjkim	pand	%xmm5,%xmm1
766238405Sjkim	movq	96(%rdx),%xmm3
767238405Sjkim	pand	%xmm6,%xmm2
768238405Sjkim	por	%xmm1,%xmm0
769238405Sjkim	pand	%xmm7,%xmm3
770238405Sjkim	por	%xmm2,%xmm0
771238405Sjkim	leaq	256(%rdx),%rdx
772238405Sjkim	por	%xmm3,%xmm0
773238405Sjkim
774238405Sjkim	movq	%xmm0,(%rdi)
775238405Sjkim	leaq	8(%rdi),%rdi
776238405Sjkim	subq	$1,%rsi
777238405Sjkim	jnz	.Lgather
778238405Sjkim	.byte	0xf3,0xc3
779238405Sjkim.LSEH_end_bn_gather5:
780238405Sjkim.size	bn_gather5,.-bn_gather5
781238405Sjkim.align	64
782238405Sjkim.Lmagic_masks:
783238405Sjkim.long	0,0, 0,0, 0,0, -1,-1
784238405Sjkim.long	0,0, 0,0, 0,0,  0,0
785238405Sjkim.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
786