x86_64cpuid.S revision 1.5
1#include <machine/asm.h>
2
3.globl	OPENSSL_cpuid_setup
4.section	.init
5	call	PIC_PLT(OPENSSL_cpuid_setup)
6
7.hidden	OPENSSL_ia32cap_P
8.comm	OPENSSL_ia32cap_P,16,4
9
10.text
11
12.globl	OPENSSL_atomic_add
13.type	OPENSSL_atomic_add,@function
14.align	16
15OPENSSL_atomic_add:
16	movl	(%rdi),%eax
17.Lspin:	leaq	(%rsi,%rax,1),%r8
18.byte	0xf0
19	cmpxchgl	%r8d,(%rdi)
20	jne	.Lspin
21	movl	%r8d,%eax
22.byte	0x48,0x98
23	.byte	0xf3,0xc3
24.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
25
26.globl	OPENSSL_rdtsc
27.type	OPENSSL_rdtsc,@function
28.align	16
29OPENSSL_rdtsc:
30	rdtsc
31	shlq	$32,%rdx
32	orq	%rdx,%rax
33	.byte	0xf3,0xc3
34.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc
35
36.globl	OPENSSL_ia32_cpuid
37.type	OPENSSL_ia32_cpuid,@function
38.align	16
39OPENSSL_ia32_cpuid:
40	movq	%rbx,%r8
41
42	xorl	%eax,%eax
43	movl	%eax,8(%rdi)
44	cpuid
45	movl	%eax,%r11d
46
47	xorl	%eax,%eax
48	cmpl	$1970169159,%ebx
49	setne	%al
50	movl	%eax,%r9d
51	cmpl	$1231384169,%edx
52	setne	%al
53	orl	%eax,%r9d
54	cmpl	$1818588270,%ecx
55	setne	%al
56	orl	%eax,%r9d
57	jz	.Lintel
58
59	cmpl	$1752462657,%ebx
60	setne	%al
61	movl	%eax,%r10d
62	cmpl	$1769238117,%edx
63	setne	%al
64	orl	%eax,%r10d
65	cmpl	$1145913699,%ecx
66	setne	%al
67	orl	%eax,%r10d
68	jnz	.Lintel
69
70
71	movl	$2147483648,%eax
72	cpuid
73	cmpl	$2147483649,%eax
74	jb	.Lintel
75	movl	%eax,%r10d
76	movl	$2147483649,%eax
77	cpuid
78	orl	%ecx,%r9d
79	andl	$2049,%r9d
80
81	cmpl	$2147483656,%r10d
82	jb	.Lintel
83
84	movl	$2147483656,%eax
85	cpuid
86	movzbq	%cl,%r10
87	incq	%r10
88
89	movl	$1,%eax
90	cpuid
91	btl	$28,%edx
92	jnc	.Lgeneric
93	shrl	$16,%ebx
94	cmpb	%r10b,%bl
95	ja	.Lgeneric
96	andl	$4026531839,%edx
97	jmp	.Lgeneric
98
99.Lintel:
100	cmpl	$4,%r11d
101	movl	$-1,%r10d
102	jb	.Lnocacheinfo
103
104	movl	$4,%eax
105	movl	$0,%ecx
106	cpuid
107	movl	%eax,%r10d
108	shrl	$14,%r10d
109	andl	$4095,%r10d
110
111	cmpl	$7,%r11d
112	jb	.Lnocacheinfo
113
114	movl	$7,%eax
115	xorl	%ecx,%ecx
116	cpuid
117	movl	%ebx,8(%rdi)
118
119.Lnocacheinfo:
120	movl	$1,%eax
121	cpuid
122	andl	$3220176895,%edx
123	cmpl	$0,%r9d
124	jne	.Lnotintel
125	orl	$1073741824,%edx
126	andb	$15,%ah
127	cmpb	$15,%ah
128	jne	.Lnotintel
129	orl	$1048576,%edx
130.Lnotintel:
131	btl	$28,%edx
132	jnc	.Lgeneric
133	andl	$4026531839,%edx
134	cmpl	$0,%r10d
135	je	.Lgeneric
136
137	orl	$268435456,%edx
138	shrl	$16,%ebx
139	cmpb	$1,%bl
140	ja	.Lgeneric
141	andl	$4026531839,%edx
142.Lgeneric:
143	andl	$2048,%r9d
144	andl	$4294965247,%ecx
145	orl	%ecx,%r9d
146
147	movl	%edx,%r10d
148	btl	$27,%r9d
149	jnc	.Lclear_avx
150	xorl	%ecx,%ecx
151.byte	0x0f,0x01,0xd0
152	andl	$6,%eax
153	cmpl	$6,%eax
154	je	.Ldone
155.Lclear_avx:
156	movl	$4026525695,%eax
157	andl	%eax,%r9d
158	andl	$4294967263,8(%rdi)
159.Ldone:
160	shlq	$32,%r9
161	movl	%r10d,%eax
162	movq	%r8,%rbx
163	orq	%r9,%rax
164	.byte	0xf3,0xc3
165.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
166
167.globl	OPENSSL_cleanse
168.type	OPENSSL_cleanse,@function
169.align	16
170OPENSSL_cleanse:
171	xorq	%rax,%rax
172	cmpq	$15,%rsi
173	jae	.Lot
174	cmpq	$0,%rsi
175	je	.Lret
176.Little:
177	movb	%al,(%rdi)
178	subq	$1,%rsi
179	leaq	1(%rdi),%rdi
180	jnz	.Little
181.Lret:
182	.byte	0xf3,0xc3
183.align	16
184.Lot:
185	testq	$7,%rdi
186	jz	.Laligned
187	movb	%al,(%rdi)
188	leaq	-1(%rsi),%rsi
189	leaq	1(%rdi),%rdi
190	jmp	.Lot
191.Laligned:
192	movq	%rax,(%rdi)
193	leaq	-8(%rsi),%rsi
194	testq	$-8,%rsi
195	leaq	8(%rdi),%rdi
196	jnz	.Laligned
197	cmpq	$0,%rsi
198	jne	.Little
199	.byte	0xf3,0xc3
200.size	OPENSSL_cleanse,.-OPENSSL_cleanse
201.globl	OPENSSL_wipe_cpu
202.type	OPENSSL_wipe_cpu,@function
203.align	16
204OPENSSL_wipe_cpu:
205	pxor	%xmm0,%xmm0
206	pxor	%xmm1,%xmm1
207	pxor	%xmm2,%xmm2
208	pxor	%xmm3,%xmm3
209	pxor	%xmm4,%xmm4
210	pxor	%xmm5,%xmm5
211	pxor	%xmm6,%xmm6
212	pxor	%xmm7,%xmm7
213	pxor	%xmm8,%xmm8
214	pxor	%xmm9,%xmm9
215	pxor	%xmm10,%xmm10
216	pxor	%xmm11,%xmm11
217	pxor	%xmm12,%xmm12
218	pxor	%xmm13,%xmm13
219	pxor	%xmm14,%xmm14
220	pxor	%xmm15,%xmm15
221	xorq	%rcx,%rcx
222	xorq	%rdx,%rdx
223	xorq	%rsi,%rsi
224	xorq	%rdi,%rdi
225	xorq	%r8,%r8
226	xorq	%r9,%r9
227	xorq	%r10,%r10
228	xorq	%r11,%r11
229	leaq	8(%rsp),%rax
230	.byte	0xf3,0xc3
231.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
232.globl	OPENSSL_instrument_bus
233.type	OPENSSL_instrument_bus,@function
234.align	16
235OPENSSL_instrument_bus:
236	movq	%rdi,%r10
237	movq	%rsi,%rcx
238	movq	%rsi,%r11
239
240	rdtsc
241	movl	%eax,%r8d
242	movl	$0,%r9d
243	clflush	(%r10)
244.byte	0xf0
245	addl	%r9d,(%r10)
246	jmp	.Loop
247.align	16
248.Loop:	rdtsc
249	movl	%eax,%edx
250	subl	%r8d,%eax
251	movl	%edx,%r8d
252	movl	%eax,%r9d
253	clflush	(%r10)
254.byte	0xf0
255	addl	%eax,(%r10)
256	leaq	4(%r10),%r10
257	subq	$1,%rcx
258	jnz	.Loop
259
260	movq	%r11,%rax
261	.byte	0xf3,0xc3
262.size	OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
263
264.globl	OPENSSL_instrument_bus2
265.type	OPENSSL_instrument_bus2,@function
266.align	16
267OPENSSL_instrument_bus2:
268	movq	%rdi,%r10
269	movq	%rsi,%rcx
270	movq	%rdx,%r11
271	movq	%rcx,8(%rsp)
272
273	rdtsc
274	movl	%eax,%r8d
275	movl	$0,%r9d
276
277	clflush	(%r10)
278.byte	0xf0
279	addl	%r9d,(%r10)
280
281	rdtsc
282	movl	%eax,%edx
283	subl	%r8d,%eax
284	movl	%edx,%r8d
285	movl	%eax,%r9d
286.Loop2:
287	clflush	(%r10)
288.byte	0xf0
289	addl	%eax,(%r10)
290
291	subq	$1,%r11
292	jz	.Ldone2
293
294	rdtsc
295	movl	%eax,%edx
296	subl	%r8d,%eax
297	movl	%edx,%r8d
298	cmpl	%r9d,%eax
299	movl	%eax,%r9d
300	movl	$0,%edx
301	setne	%dl
302	subq	%rdx,%rcx
303	leaq	(%r10,%rdx,4),%r10
304	jnz	.Loop2
305
306.Ldone2:
307	movq	8(%rsp),%rax
308	subq	%rcx,%rax
309	.byte	0xf3,0xc3
310.size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
311.globl	OPENSSL_ia32_rdrand
312.type	OPENSSL_ia32_rdrand,@function
313.align	16
314OPENSSL_ia32_rdrand:
315	movl	$8,%ecx
316.Loop_rdrand:
317.byte	72,15,199,240
318	jc	.Lbreak_rdrand
319	loop	.Loop_rdrand
320.Lbreak_rdrand:
321	cmpq	$0,%rax
322	cmoveq	%rcx,%rax
323	.byte	0xf3,0xc3
324.size	OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
325
326.globl	OPENSSL_ia32_rdseed
327.type	OPENSSL_ia32_rdseed,@function
328.align	16
329OPENSSL_ia32_rdseed:
330	movl	$8,%ecx
331.Loop_rdseed:
332	rdseed	%rax
333	jc	.Lbreak_rdseed
334	loop	.Loop_rdseed
335.Lbreak_rdseed:
336	cmpq	$0,%rax
337	cmoveq	%rcx,%rax
338	.byte	0xf3,0xc3
339.size	OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
340