x86_64cpuid.S revision 1.11
1#include <machine/asm.h>
2
3.globl	OPENSSL_cpuid_setup
4.section	.init
5	call	PIC_PLT(OPENSSL_cpuid_setup)
6
7.hidden	OPENSSL_ia32cap_P
8.comm	OPENSSL_ia32cap_P,16,4
9
10.text
11
12.globl	OPENSSL_atomic_add
13.type	OPENSSL_atomic_add,@function
14.align	16
15OPENSSL_atomic_add:
16	movl	(%rdi),%eax
17.Lspin:	leaq	(%rsi,%rax,1),%r8
18.byte	0xf0
19	cmpxchgl	%r8d,(%rdi)
20	jne	.Lspin
21	movl	%r8d,%eax
22.byte	0x48,0x98
23	.byte	0xf3,0xc3
24.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
25
26.globl	OPENSSL_rdtsc
27.type	OPENSSL_rdtsc,@function
28.align	16
29OPENSSL_rdtsc:
30	rdtsc
31	shlq	$32,%rdx
32	orq	%rdx,%rax
33	.byte	0xf3,0xc3
34.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc
35
36.globl	OPENSSL_ia32_cpuid
37.type	OPENSSL_ia32_cpuid,@function
38.align	16
39OPENSSL_ia32_cpuid:
40.cfi_startproc
41	movq	%rbx,%r8
42.cfi_register	%rbx,%r8
43
44	xorl	%eax,%eax
45	movq	%rax,8(%rdi)
46	cpuid
47	movl	%eax,%r11d
48
49	xorl	%eax,%eax
50	cmpl	$0x756e6547,%ebx
51	setne	%al
52	movl	%eax,%r9d
53	cmpl	$0x49656e69,%edx
54	setne	%al
55	orl	%eax,%r9d
56	cmpl	$0x6c65746e,%ecx
57	setne	%al
58	orl	%eax,%r9d
59	jz	.Lintel
60
61	cmpl	$0x68747541,%ebx
62	setne	%al
63	movl	%eax,%r10d
64	cmpl	$0x69746E65,%edx
65	setne	%al
66	orl	%eax,%r10d
67	cmpl	$0x444D4163,%ecx
68	setne	%al
69	orl	%eax,%r10d
70	jnz	.Lintel
71
72
73	movl	$0x80000000,%eax
74	cpuid
75	cmpl	$0x80000001,%eax
76	jb	.Lintel
77	movl	%eax,%r10d
78	movl	$0x80000001,%eax
79	cpuid
80	orl	%ecx,%r9d
81	andl	$0x00000801,%r9d
82
83	cmpl	$0x80000008,%r10d
84	jb	.Lintel
85
86	movl	$0x80000008,%eax
87	cpuid
88	movzbq	%cl,%r10
89	incq	%r10
90
91	movl	$1,%eax
92	cpuid
93	btl	$28,%edx
94	jnc	.Lgeneric
95	shrl	$16,%ebx
96	cmpb	%r10b,%bl
97	ja	.Lgeneric
98	andl	$0xefffffff,%edx
99	jmp	.Lgeneric
100
101.Lintel:
102	cmpl	$4,%r11d
103	movl	$-1,%r10d
104	jb	.Lnocacheinfo
105
106	movl	$4,%eax
107	movl	$0,%ecx
108	cpuid
109	movl	%eax,%r10d
110	shrl	$14,%r10d
111	andl	$0xfff,%r10d
112
113.Lnocacheinfo:
114	movl	$1,%eax
115	cpuid
116	movd	%eax,%xmm0
117	andl	$0xbfefffff,%edx
118	cmpl	$0,%r9d
119	jne	.Lnotintel
120	orl	$0x40000000,%edx
121	andb	$15,%ah
122	cmpb	$15,%ah
123	jne	.LnotP4
124	orl	$0x00100000,%edx
125.LnotP4:
126	cmpb	$6,%ah
127	jne	.Lnotintel
128	andl	$0x0fff0ff0,%eax
129	cmpl	$0x00050670,%eax
130	je	.Lknights
131	cmpl	$0x00080650,%eax
132	jne	.Lnotintel
133.Lknights:
134	andl	$0xfbffffff,%ecx
135
136.Lnotintel:
137	btl	$28,%edx
138	jnc	.Lgeneric
139	andl	$0xefffffff,%edx
140	cmpl	$0,%r10d
141	je	.Lgeneric
142
143	orl	$0x10000000,%edx
144	shrl	$16,%ebx
145	cmpb	$1,%bl
146	ja	.Lgeneric
147	andl	$0xefffffff,%edx
148.Lgeneric:
149	andl	$0x00000800,%r9d
150	andl	$0xfffff7ff,%ecx
151	orl	%ecx,%r9d
152
153	movl	%edx,%r10d
154
155	cmpl	$7,%r11d
156	jb	.Lno_extended_info
157	movl	$7,%eax
158	xorl	%ecx,%ecx
159	cpuid
160	btl	$26,%r9d
161	jc	.Lnotknights
162	andl	$0xfff7ffff,%ebx
163.Lnotknights:
164	movd	%xmm0,%eax
165	andl	$0x0fff0ff0,%eax
166	cmpl	$0x00050650,%eax
167	jne	.Lnotskylakex
168	andl	$0xfffeffff,%ebx
169
170.Lnotskylakex:
171	movl	%ebx,8(%rdi)
172	movl	%ecx,12(%rdi)
173.Lno_extended_info:
174
175	btl	$27,%r9d
176	jnc	.Lclear_avx
177	xorl	%ecx,%ecx
178.byte	0x0f,0x01,0xd0
179	andl	$0xe6,%eax
180	cmpl	$0xe6,%eax
181	je	.Ldone
182	andl	$0x3fdeffff,8(%rdi)
183
184
185
186
187	andl	$6,%eax
188	cmpl	$6,%eax
189	je	.Ldone
190.Lclear_avx:
191	movl	$0xefffe7ff,%eax
192	andl	%eax,%r9d
193	movl	$0x3fdeffdf,%eax
194	andl	%eax,8(%rdi)
195.Ldone:
196	shlq	$32,%r9
197	movl	%r10d,%eax
198	movq	%r8,%rbx
199.cfi_restore	%rbx
200	orq	%r9,%rax
201	.byte	0xf3,0xc3
202.cfi_endproc
203.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
204
205.globl	OPENSSL_cleanse
206.type	OPENSSL_cleanse,@function
207.align	16
208OPENSSL_cleanse:
209	xorq	%rax,%rax
210	cmpq	$15,%rsi
211	jae	.Lot
212	cmpq	$0,%rsi
213	je	.Lret
214.Little:
215	movb	%al,(%rdi)
216	subq	$1,%rsi
217	leaq	1(%rdi),%rdi
218	jnz	.Little
219.Lret:
220	.byte	0xf3,0xc3
221.align	16
222.Lot:
223	testq	$7,%rdi
224	jz	.Laligned
225	movb	%al,(%rdi)
226	leaq	-1(%rsi),%rsi
227	leaq	1(%rdi),%rdi
228	jmp	.Lot
229.Laligned:
230	movq	%rax,(%rdi)
231	leaq	-8(%rsi),%rsi
232	testq	$-8,%rsi
233	leaq	8(%rdi),%rdi
234	jnz	.Laligned
235	cmpq	$0,%rsi
236	jne	.Little
237	.byte	0xf3,0xc3
238.size	OPENSSL_cleanse,.-OPENSSL_cleanse
239
240.globl	CRYPTO_memcmp
241.type	CRYPTO_memcmp,@function
242.align	16
243CRYPTO_memcmp:
244	xorq	%rax,%rax
245	xorq	%r10,%r10
246	cmpq	$0,%rdx
247	je	.Lno_data
248	cmpq	$16,%rdx
249	jne	.Loop_cmp
250	movq	(%rdi),%r10
251	movq	8(%rdi),%r11
252	movq	$1,%rdx
253	xorq	(%rsi),%r10
254	xorq	8(%rsi),%r11
255	orq	%r11,%r10
256	cmovnzq	%rdx,%rax
257	.byte	0xf3,0xc3
258
259.align	16
260.Loop_cmp:
261	movb	(%rdi),%r10b
262	leaq	1(%rdi),%rdi
263	xorb	(%rsi),%r10b
264	leaq	1(%rsi),%rsi
265	orb	%r10b,%al
266	decq	%rdx
267	jnz	.Loop_cmp
268	negq	%rax
269	shrq	$63,%rax
270.Lno_data:
271	.byte	0xf3,0xc3
272.size	CRYPTO_memcmp,.-CRYPTO_memcmp
273.globl	OPENSSL_wipe_cpu
274.type	OPENSSL_wipe_cpu,@function
275.align	16
276OPENSSL_wipe_cpu:
277	pxor	%xmm0,%xmm0
278	pxor	%xmm1,%xmm1
279	pxor	%xmm2,%xmm2
280	pxor	%xmm3,%xmm3
281	pxor	%xmm4,%xmm4
282	pxor	%xmm5,%xmm5
283	pxor	%xmm6,%xmm6
284	pxor	%xmm7,%xmm7
285	pxor	%xmm8,%xmm8
286	pxor	%xmm9,%xmm9
287	pxor	%xmm10,%xmm10
288	pxor	%xmm11,%xmm11
289	pxor	%xmm12,%xmm12
290	pxor	%xmm13,%xmm13
291	pxor	%xmm14,%xmm14
292	pxor	%xmm15,%xmm15
293	xorq	%rcx,%rcx
294	xorq	%rdx,%rdx
295	xorq	%rsi,%rsi
296	xorq	%rdi,%rdi
297	xorq	%r8,%r8
298	xorq	%r9,%r9
299	xorq	%r10,%r10
300	xorq	%r11,%r11
301	leaq	8(%rsp),%rax
302	.byte	0xf3,0xc3
303.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
304.globl	OPENSSL_instrument_bus
305.type	OPENSSL_instrument_bus,@function
306.align	16
307OPENSSL_instrument_bus:
308	movq	%rdi,%r10
309	movq	%rsi,%rcx
310	movq	%rsi,%r11
311
312	rdtsc
313	movl	%eax,%r8d
314	movl	$0,%r9d
315	clflush	(%r10)
316.byte	0xf0
317	addl	%r9d,(%r10)
318	jmp	.Loop
319.align	16
320.Loop:	rdtsc
321	movl	%eax,%edx
322	subl	%r8d,%eax
323	movl	%edx,%r8d
324	movl	%eax,%r9d
325	clflush	(%r10)
326.byte	0xf0
327	addl	%eax,(%r10)
328	leaq	4(%r10),%r10
329	subq	$1,%rcx
330	jnz	.Loop
331
332	movq	%r11,%rax
333	.byte	0xf3,0xc3
334.size	OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
335
336.globl	OPENSSL_instrument_bus2
337.type	OPENSSL_instrument_bus2,@function
338.align	16
339OPENSSL_instrument_bus2:
340	movq	%rdi,%r10
341	movq	%rsi,%rcx
342	movq	%rdx,%r11
343	movq	%rcx,8(%rsp)
344
345	rdtsc
346	movl	%eax,%r8d
347	movl	$0,%r9d
348
349	clflush	(%r10)
350.byte	0xf0
351	addl	%r9d,(%r10)
352
353	rdtsc
354	movl	%eax,%edx
355	subl	%r8d,%eax
356	movl	%edx,%r8d
357	movl	%eax,%r9d
358.Loop2:
359	clflush	(%r10)
360.byte	0xf0
361	addl	%eax,(%r10)
362
363	subq	$1,%r11
364	jz	.Ldone2
365
366	rdtsc
367	movl	%eax,%edx
368	subl	%r8d,%eax
369	movl	%edx,%r8d
370	cmpl	%r9d,%eax
371	movl	%eax,%r9d
372	movl	$0,%edx
373	setne	%dl
374	subq	%rdx,%rcx
375	leaq	(%r10,%rdx,4),%r10
376	jnz	.Loop2
377
378.Ldone2:
379	movq	8(%rsp),%rax
380	subq	%rcx,%rax
381	.byte	0xf3,0xc3
382.size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
383.globl	OPENSSL_ia32_rdrand_bytes
384.type	OPENSSL_ia32_rdrand_bytes,@function
385.align	16
386OPENSSL_ia32_rdrand_bytes:
387	xorq	%rax,%rax
388	cmpq	$0,%rsi
389	je	.Ldone_rdrand_bytes
390
391	movq	$8,%r11
392.Loop_rdrand_bytes:
393.byte	73,15,199,242
394	jc	.Lbreak_rdrand_bytes
395	decq	%r11
396	jnz	.Loop_rdrand_bytes
397	jmp	.Ldone_rdrand_bytes
398
399.align	16
400.Lbreak_rdrand_bytes:
401	cmpq	$8,%rsi
402	jb	.Ltail_rdrand_bytes
403	movq	%r10,(%rdi)
404	leaq	8(%rdi),%rdi
405	addq	$8,%rax
406	subq	$8,%rsi
407	jz	.Ldone_rdrand_bytes
408	movq	$8,%r11
409	jmp	.Loop_rdrand_bytes
410
411.align	16
412.Ltail_rdrand_bytes:
413	movb	%r10b,(%rdi)
414	leaq	1(%rdi),%rdi
415	incq	%rax
416	shrq	$8,%r10
417	decq	%rsi
418	jnz	.Ltail_rdrand_bytes
419
420.Ldone_rdrand_bytes:
421	xorq	%r10,%r10
422	.byte	0xf3,0xc3
423.size	OPENSSL_ia32_rdrand_bytes,.-OPENSSL_ia32_rdrand_bytes
424.globl	OPENSSL_ia32_rdseed_bytes
425.type	OPENSSL_ia32_rdseed_bytes,@function
426.align	16
427OPENSSL_ia32_rdseed_bytes:
428	xorq	%rax,%rax
429	cmpq	$0,%rsi
430	je	.Ldone_rdseed_bytes
431
432	movq	$8,%r11
433.Loop_rdseed_bytes:
434.byte	73,15,199,250
435	jc	.Lbreak_rdseed_bytes
436	decq	%r11
437	jnz	.Loop_rdseed_bytes
438	jmp	.Ldone_rdseed_bytes
439
440.align	16
441.Lbreak_rdseed_bytes:
442	cmpq	$8,%rsi
443	jb	.Ltail_rdseed_bytes
444	movq	%r10,(%rdi)
445	leaq	8(%rdi),%rdi
446	addq	$8,%rax
447	subq	$8,%rsi
448	jz	.Ldone_rdseed_bytes
449	movq	$8,%r11
450	jmp	.Loop_rdseed_bytes
451
452.align	16
453.Ltail_rdseed_bytes:
454	movb	%r10b,(%rdi)
455	leaq	1(%rdi),%rdi
456	incq	%rax
457	shrq	$8,%r10
458	decq	%rsi
459	jnz	.Ltail_rdseed_bytes
460
461.Ldone_rdseed_bytes:
462	xorq	%r10,%r10
463	.byte	0xf3,0xc3
464.size	OPENSSL_ia32_rdseed_bytes,.-OPENSSL_ia32_rdseed_bytes
465