x86_64-gf2m.S revision 305153
1/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/x86_64-gf2m.S 305153 2016-08-31 20:33:59Z jkim $ */
2/* Do not modify. This file is auto-generated from x86_64-gf2m.pl. */
3.text
4
5.type	_mul_1x1,@function
6.align	16
7_mul_1x1:
8	subq	$128+8,%rsp
9	movq	$-1,%r9
10	leaq	(%rax,%rax,1),%rsi
11	shrq	$3,%r9
12	leaq	(,%rax,4),%rdi
13	andq	%rax,%r9
14	leaq	(,%rax,8),%r12
15	sarq	$63,%rax
16	leaq	(%r9,%r9,1),%r10
17	sarq	$63,%rsi
18	leaq	(,%r9,4),%r11
19	andq	%rbp,%rax
20	sarq	$63,%rdi
21	movq	%rax,%rdx
22	shlq	$63,%rax
23	andq	%rbp,%rsi
24	shrq	$1,%rdx
25	movq	%rsi,%rcx
26	shlq	$62,%rsi
27	andq	%rbp,%rdi
28	shrq	$2,%rcx
29	xorq	%rsi,%rax
30	movq	%rdi,%rbx
31	shlq	$61,%rdi
32	xorq	%rcx,%rdx
33	shrq	$3,%rbx
34	xorq	%rdi,%rax
35	xorq	%rbx,%rdx
36
37	movq	%r9,%r13
38	movq	$0,0(%rsp)
39	xorq	%r10,%r13
40	movq	%r9,8(%rsp)
41	movq	%r11,%r14
42	movq	%r10,16(%rsp)
43	xorq	%r12,%r14
44	movq	%r13,24(%rsp)
45
46	xorq	%r11,%r9
47	movq	%r11,32(%rsp)
48	xorq	%r11,%r10
49	movq	%r9,40(%rsp)
50	xorq	%r11,%r13
51	movq	%r10,48(%rsp)
52	xorq	%r14,%r9
53	movq	%r13,56(%rsp)
54	xorq	%r14,%r10
55
56	movq	%r12,64(%rsp)
57	xorq	%r14,%r13
58	movq	%r9,72(%rsp)
59	xorq	%r11,%r9
60	movq	%r10,80(%rsp)
61	xorq	%r11,%r10
62	movq	%r13,88(%rsp)
63
64	xorq	%r11,%r13
65	movq	%r14,96(%rsp)
66	movq	%r8,%rsi
67	movq	%r9,104(%rsp)
68	andq	%rbp,%rsi
69	movq	%r10,112(%rsp)
70	shrq	$4,%rbp
71	movq	%r13,120(%rsp)
72	movq	%r8,%rdi
73	andq	%rbp,%rdi
74	shrq	$4,%rbp
75
76	movq	(%rsp,%rsi,8),%xmm0
77	movq	%r8,%rsi
78	andq	%rbp,%rsi
79	shrq	$4,%rbp
80	movq	(%rsp,%rdi,8),%rcx
81	movq	%r8,%rdi
82	movq	%rcx,%rbx
83	shlq	$4,%rcx
84	andq	%rbp,%rdi
85	movq	(%rsp,%rsi,8),%xmm1
86	shrq	$60,%rbx
87	xorq	%rcx,%rax
88	pslldq	$1,%xmm1
89	movq	%r8,%rsi
90	shrq	$4,%rbp
91	xorq	%rbx,%rdx
92	andq	%rbp,%rsi
93	shrq	$4,%rbp
94	pxor	%xmm1,%xmm0
95	movq	(%rsp,%rdi,8),%rcx
96	movq	%r8,%rdi
97	movq	%rcx,%rbx
98	shlq	$12,%rcx
99	andq	%rbp,%rdi
100	movq	(%rsp,%rsi,8),%xmm1
101	shrq	$52,%rbx
102	xorq	%rcx,%rax
103	pslldq	$2,%xmm1
104	movq	%r8,%rsi
105	shrq	$4,%rbp
106	xorq	%rbx,%rdx
107	andq	%rbp,%rsi
108	shrq	$4,%rbp
109	pxor	%xmm1,%xmm0
110	movq	(%rsp,%rdi,8),%rcx
111	movq	%r8,%rdi
112	movq	%rcx,%rbx
113	shlq	$20,%rcx
114	andq	%rbp,%rdi
115	movq	(%rsp,%rsi,8),%xmm1
116	shrq	$44,%rbx
117	xorq	%rcx,%rax
118	pslldq	$3,%xmm1
119	movq	%r8,%rsi
120	shrq	$4,%rbp
121	xorq	%rbx,%rdx
122	andq	%rbp,%rsi
123	shrq	$4,%rbp
124	pxor	%xmm1,%xmm0
125	movq	(%rsp,%rdi,8),%rcx
126	movq	%r8,%rdi
127	movq	%rcx,%rbx
128	shlq	$28,%rcx
129	andq	%rbp,%rdi
130	movq	(%rsp,%rsi,8),%xmm1
131	shrq	$36,%rbx
132	xorq	%rcx,%rax
133	pslldq	$4,%xmm1
134	movq	%r8,%rsi
135	shrq	$4,%rbp
136	xorq	%rbx,%rdx
137	andq	%rbp,%rsi
138	shrq	$4,%rbp
139	pxor	%xmm1,%xmm0
140	movq	(%rsp,%rdi,8),%rcx
141	movq	%r8,%rdi
142	movq	%rcx,%rbx
143	shlq	$36,%rcx
144	andq	%rbp,%rdi
145	movq	(%rsp,%rsi,8),%xmm1
146	shrq	$28,%rbx
147	xorq	%rcx,%rax
148	pslldq	$5,%xmm1
149	movq	%r8,%rsi
150	shrq	$4,%rbp
151	xorq	%rbx,%rdx
152	andq	%rbp,%rsi
153	shrq	$4,%rbp
154	pxor	%xmm1,%xmm0
155	movq	(%rsp,%rdi,8),%rcx
156	movq	%r8,%rdi
157	movq	%rcx,%rbx
158	shlq	$44,%rcx
159	andq	%rbp,%rdi
160	movq	(%rsp,%rsi,8),%xmm1
161	shrq	$20,%rbx
162	xorq	%rcx,%rax
163	pslldq	$6,%xmm1
164	movq	%r8,%rsi
165	shrq	$4,%rbp
166	xorq	%rbx,%rdx
167	andq	%rbp,%rsi
168	shrq	$4,%rbp
169	pxor	%xmm1,%xmm0
170	movq	(%rsp,%rdi,8),%rcx
171	movq	%r8,%rdi
172	movq	%rcx,%rbx
173	shlq	$52,%rcx
174	andq	%rbp,%rdi
175	movq	(%rsp,%rsi,8),%xmm1
176	shrq	$12,%rbx
177	xorq	%rcx,%rax
178	pslldq	$7,%xmm1
179	movq	%r8,%rsi
180	shrq	$4,%rbp
181	xorq	%rbx,%rdx
182	andq	%rbp,%rsi
183	shrq	$4,%rbp
184	pxor	%xmm1,%xmm0
185	movq	(%rsp,%rdi,8),%rcx
186	movq	%rcx,%rbx
187	shlq	$60,%rcx
188.byte	102,72,15,126,198
189	shrq	$4,%rbx
190	xorq	%rcx,%rax
191	psrldq	$8,%xmm0
192	xorq	%rbx,%rdx
193.byte	102,72,15,126,199
194	xorq	%rsi,%rax
195	xorq	%rdi,%rdx
196
197	addq	$128+8,%rsp
198	.byte	0xf3,0xc3
199.Lend_mul_1x1:
200.size	_mul_1x1,.-_mul_1x1
201
202.globl	bn_GF2m_mul_2x2
203.type	bn_GF2m_mul_2x2,@function
204.align	16
205bn_GF2m_mul_2x2:
206	movq	OPENSSL_ia32cap_P(%rip),%rax
207	btq	$33,%rax
208	jnc	.Lvanilla_mul_2x2
209
210.byte	102,72,15,110,198
211.byte	102,72,15,110,201
212.byte	102,72,15,110,210
213.byte	102,73,15,110,216
214	movdqa	%xmm0,%xmm4
215	movdqa	%xmm1,%xmm5
216.byte	102,15,58,68,193,0
217	pxor	%xmm2,%xmm4
218	pxor	%xmm3,%xmm5
219.byte	102,15,58,68,211,0
220.byte	102,15,58,68,229,0
221	xorps	%xmm0,%xmm4
222	xorps	%xmm2,%xmm4
223	movdqa	%xmm4,%xmm5
224	pslldq	$8,%xmm4
225	psrldq	$8,%xmm5
226	pxor	%xmm4,%xmm2
227	pxor	%xmm5,%xmm0
228	movdqu	%xmm2,0(%rdi)
229	movdqu	%xmm0,16(%rdi)
230	.byte	0xf3,0xc3
231
232.align	16
233.Lvanilla_mul_2x2:
234	leaq	-136(%rsp),%rsp
235	movq	%r14,80(%rsp)
236	movq	%r13,88(%rsp)
237	movq	%r12,96(%rsp)
238	movq	%rbp,104(%rsp)
239	movq	%rbx,112(%rsp)
240.Lbody_mul_2x2:
241	movq	%rdi,32(%rsp)
242	movq	%rsi,40(%rsp)
243	movq	%rdx,48(%rsp)
244	movq	%rcx,56(%rsp)
245	movq	%r8,64(%rsp)
246
247	movq	$0xf,%r8
248	movq	%rsi,%rax
249	movq	%rcx,%rbp
250	call	_mul_1x1
251	movq	%rax,16(%rsp)
252	movq	%rdx,24(%rsp)
253
254	movq	48(%rsp),%rax
255	movq	64(%rsp),%rbp
256	call	_mul_1x1
257	movq	%rax,0(%rsp)
258	movq	%rdx,8(%rsp)
259
260	movq	40(%rsp),%rax
261	movq	56(%rsp),%rbp
262	xorq	48(%rsp),%rax
263	xorq	64(%rsp),%rbp
264	call	_mul_1x1
265	movq	0(%rsp),%rbx
266	movq	8(%rsp),%rcx
267	movq	16(%rsp),%rdi
268	movq	24(%rsp),%rsi
269	movq	32(%rsp),%rbp
270
271	xorq	%rdx,%rax
272	xorq	%rcx,%rdx
273	xorq	%rbx,%rax
274	movq	%rbx,0(%rbp)
275	xorq	%rdi,%rdx
276	movq	%rsi,24(%rbp)
277	xorq	%rsi,%rax
278	xorq	%rsi,%rdx
279	xorq	%rdx,%rax
280	movq	%rdx,16(%rbp)
281	movq	%rax,8(%rbp)
282
283	movq	80(%rsp),%r14
284	movq	88(%rsp),%r13
285	movq	96(%rsp),%r12
286	movq	104(%rsp),%rbp
287	movq	112(%rsp),%rbx
288	leaq	136(%rsp),%rsp
289	.byte	0xf3,0xc3
290.Lend_mul_2x2:
291.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
292.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
293.align	16
294