x86_64cpuid.pl revision 325337
1#!/usr/bin/env perl
2
3$flavour = shift;
4$output  = shift;
5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12die "can't locate x86_64-xlate.pl";
13
14open OUT,"| \"$^X\" $xlate $flavour $output";
15*STDOUT=*OUT;
16
17($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") :	# Win64 order
18				 ("%rdi","%rsi","%rdx","%rcx");	# Unix order
19
20print<<___;
21.extern		OPENSSL_cpuid_setup
22.hidden		OPENSSL_cpuid_setup
23.section	.init
24	call	OPENSSL_cpuid_setup
25
26.hidden	OPENSSL_ia32cap_P
27.comm	OPENSSL_ia32cap_P,16,4
28
29.text
30
31.globl	OPENSSL_atomic_add
32.type	OPENSSL_atomic_add,\@abi-omnipotent
33.align	16
34OPENSSL_atomic_add:
35	movl	($arg1),%eax
36.Lspin:	leaq	($arg2,%rax),%r8
37	.byte	0xf0		# lock
38	cmpxchgl	%r8d,($arg1)
39	jne	.Lspin
40	movl	%r8d,%eax
41	.byte	0x48,0x98	# cltq/cdqe
42	ret
43.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
44
45.globl	OPENSSL_rdtsc
46.type	OPENSSL_rdtsc,\@abi-omnipotent
47.align	16
48OPENSSL_rdtsc:
49	rdtsc
50	shl	\$32,%rdx
51	or	%rdx,%rax
52	ret
53.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc
54
55.globl	OPENSSL_ia32_cpuid
56.type	OPENSSL_ia32_cpuid,\@function,1
57.align	16
58OPENSSL_ia32_cpuid:
59	mov	%rbx,%r8		# save %rbx
60
61	xor	%eax,%eax
62	mov	%eax,8(%rdi)		# clear extended feature flags
63	cpuid
64	mov	%eax,%r11d		# max value for standard query level
65
66	xor	%eax,%eax
67	cmp	\$0x756e6547,%ebx	# "Genu"
68	setne	%al
69	mov	%eax,%r9d
70	cmp	\$0x49656e69,%edx	# "ineI"
71	setne	%al
72	or	%eax,%r9d
73	cmp	\$0x6c65746e,%ecx	# "ntel"
74	setne	%al
75	or	%eax,%r9d		# 0 indicates Intel CPU
76	jz	.Lintel
77
78	cmp	\$0x68747541,%ebx	# "Auth"
79	setne	%al
80	mov	%eax,%r10d
81	cmp	\$0x69746E65,%edx	# "enti"
82	setne	%al
83	or	%eax,%r10d
84	cmp	\$0x444D4163,%ecx	# "cAMD"
85	setne	%al
86	or	%eax,%r10d		# 0 indicates AMD CPU
87	jnz	.Lintel
88
89	# AMD specific
90	mov	\$0x80000000,%eax
91	cpuid
92	cmp	\$0x80000001,%eax
93	jb	.Lintel
94	mov	%eax,%r10d
95	mov	\$0x80000001,%eax
96	cpuid
97	or	%ecx,%r9d
98	and	\$0x00000801,%r9d	# isolate AMD XOP bit, 1<<11
99
100	cmp	\$0x80000008,%r10d
101	jb	.Lintel
102
103	mov	\$0x80000008,%eax
104	cpuid
105	movzb	%cl,%r10		# number of cores - 1
106	inc	%r10			# number of cores
107
108	mov	\$1,%eax
109	cpuid
110	bt	\$28,%edx		# test hyper-threading bit
111	jnc	.Lgeneric
112	shr	\$16,%ebx		# number of logical processors
113	cmp	%r10b,%bl
114	ja	.Lgeneric
115	and	\$0xefffffff,%edx	# ~(1<<28)
116	jmp	.Lgeneric
117
118.Lintel:
119	cmp	\$4,%r11d
120	mov	\$-1,%r10d
121	jb	.Lnocacheinfo
122
123	mov	\$4,%eax
124	mov	\$0,%ecx		# query L1D
125	cpuid
126	mov	%eax,%r10d
127	shr	\$14,%r10d
128	and	\$0xfff,%r10d		# number of cores -1 per L1D
129
130.Lnocacheinfo:
131	mov	\$1,%eax
132	cpuid
133	and	\$0xbfefffff,%edx	# force reserved bits to 0
134	cmp	\$0,%r9d
135	jne	.Lnotintel
136	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs
137	and	\$15,%ah
138	cmp	\$15,%ah		# examine Family ID
139	jne	.LnotP4
140	or	\$0x00100000,%edx	# set reserved bit#20 to engage RC4_CHAR
141.LnotP4:
142	cmp	\$6,%ah
143	jne	.Lnotintel
144	and	\$0x0fff0ff0,%eax
145	cmp	\$0x00050670,%eax	# Knights Landing
146	je	.Lknights
147	cmp	\$0x00080650,%eax	# Knights Mill (according to sde)
148	jne	.Lnotintel
149.Lknights:
150	and	\$0xfbffffff,%ecx	# clear XSAVE flag to mimic Silvermont
151
152.Lnotintel:
153	bt	\$28,%edx		# test hyper-threading bit
154	jnc	.Lgeneric
155	and	\$0xefffffff,%edx	# ~(1<<28)
156	cmp	\$0,%r10d
157	je	.Lgeneric
158
159	or	\$0x10000000,%edx	# 1<<28
160	shr	\$16,%ebx
161	cmp	\$1,%bl			# see if cache is shared
162	ja	.Lgeneric
163	and	\$0xefffffff,%edx	# ~(1<<28)
164.Lgeneric:
165	and	\$0x00000800,%r9d	# isolate AMD XOP flag
166	and	\$0xfffff7ff,%ecx
167	or	%ecx,%r9d		# merge AMD XOP flag
168
169	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
170
171	cmp	\$7,%r11d
172	jb	.Lno_extended_info
173	mov	\$7,%eax
174	xor	%ecx,%ecx
175	cpuid
176	bt	\$26,%r9d		# check XSAVE bit, cleared on Knights
177	jc	.Lnotknights
178	and	\$0xfff7ffff,%ebx	# clear ADCX/ADOX flag
179.Lnotknights:
180	mov	%ebx,8(%rdi)		# save extended feature flags
181.Lno_extended_info:
182
183	bt	\$27,%r9d		# check OSXSAVE bit
184	jnc	.Lclear_avx
185	xor	%ecx,%ecx		# XCR0
186	.byte	0x0f,0x01,0xd0		# xgetbv
187	and	\$6,%eax		# isolate XMM and YMM state support
188	cmp	\$6,%eax
189	je	.Ldone
190.Lclear_avx:
191	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11)
192	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits
193	andl	\$0xffffffdf,8(%rdi)	# clear AVX2, ~(1<<5)
194.Ldone:
195	shl	\$32,%r9
196	mov	%r10d,%eax
197	mov	%r8,%rbx		# restore %rbx
198	or	%r9,%rax
199	ret
200.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
201
202.globl  OPENSSL_cleanse
203.type   OPENSSL_cleanse,\@abi-omnipotent
204.align  16
205OPENSSL_cleanse:
206	xor	%rax,%rax
207	cmp	\$15,$arg2
208	jae	.Lot
209	cmp	\$0,$arg2
210	je	.Lret
211.Little:
212	mov	%al,($arg1)
213	sub	\$1,$arg2
214	lea	1($arg1),$arg1
215	jnz	.Little
216.Lret:
217	ret
218.align	16
219.Lot:
220	test	\$7,$arg1
221	jz	.Laligned
222	mov	%al,($arg1)
223	lea	-1($arg2),$arg2
224	lea	1($arg1),$arg1
225	jmp	.Lot
226.Laligned:
227	mov	%rax,($arg1)
228	lea	-8($arg2),$arg2
229	test	\$-8,$arg2
230	lea	8($arg1),$arg1
231	jnz	.Laligned
232	cmp	\$0,$arg2
233	jne	.Little
234	ret
235.size	OPENSSL_cleanse,.-OPENSSL_cleanse
236___
237
238print<<___ if (!$win64);
239.globl	OPENSSL_wipe_cpu
240.type	OPENSSL_wipe_cpu,\@abi-omnipotent
241.align	16
242OPENSSL_wipe_cpu:
243	pxor	%xmm0,%xmm0
244	pxor	%xmm1,%xmm1
245	pxor	%xmm2,%xmm2
246	pxor	%xmm3,%xmm3
247	pxor	%xmm4,%xmm4
248	pxor	%xmm5,%xmm5
249	pxor	%xmm6,%xmm6
250	pxor	%xmm7,%xmm7
251	pxor	%xmm8,%xmm8
252	pxor	%xmm9,%xmm9
253	pxor	%xmm10,%xmm10
254	pxor	%xmm11,%xmm11
255	pxor	%xmm12,%xmm12
256	pxor	%xmm13,%xmm13
257	pxor	%xmm14,%xmm14
258	pxor	%xmm15,%xmm15
259	xorq	%rcx,%rcx
260	xorq	%rdx,%rdx
261	xorq	%rsi,%rsi
262	xorq	%rdi,%rdi
263	xorq	%r8,%r8
264	xorq	%r9,%r9
265	xorq	%r10,%r10
266	xorq	%r11,%r11
267	leaq	8(%rsp),%rax
268	ret
269.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
270___
271print<<___ if ($win64);
272.globl	OPENSSL_wipe_cpu
273.type	OPENSSL_wipe_cpu,\@abi-omnipotent
274.align	16
275OPENSSL_wipe_cpu:
276	pxor	%xmm0,%xmm0
277	pxor	%xmm1,%xmm1
278	pxor	%xmm2,%xmm2
279	pxor	%xmm3,%xmm3
280	pxor	%xmm4,%xmm4
281	pxor	%xmm5,%xmm5
282	xorq	%rcx,%rcx
283	xorq	%rdx,%rdx
284	xorq	%r8,%r8
285	xorq	%r9,%r9
286	xorq	%r10,%r10
287	xorq	%r11,%r11
288	leaq	8(%rsp),%rax
289	ret
290.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
291___
292
293print<<___;
294.globl	OPENSSL_ia32_rdrand
295.type	OPENSSL_ia32_rdrand,\@abi-omnipotent
296.align	16
297OPENSSL_ia32_rdrand:
298	mov	\$8,%ecx
299.Loop_rdrand:
300	rdrand	%rax
301	jc	.Lbreak_rdrand
302	loop	.Loop_rdrand
303.Lbreak_rdrand:
304	cmp	\$0,%rax
305	cmove	%rcx,%rax
306	ret
307.size	OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
308
309.globl	OPENSSL_ia32_rdseed
310.type	OPENSSL_ia32_rdseed,\@abi-omnipotent
311.align	16
312OPENSSL_ia32_rdseed:
313	mov	\$8,%ecx
314.Loop_rdseed:
315	rdseed	%rax
316	jc	.Lbreak_rdseed
317	loop	.Loop_rdseed
318.Lbreak_rdseed:
319	cmp	\$0,%rax
320	cmove	%rcx,%rax
321	ret
322.size	OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
323___
324
325close STDOUT;	# flush
326