x86_64cpuid.pl revision 325335
1#!/usr/bin/env perl
2
3$flavour = shift;
4$output  = shift;
5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12die "can't locate x86_64-xlate.pl";
13
14open OUT,"| \"$^X\" $xlate $flavour $output";
15*STDOUT=*OUT;
16
17($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") :	# Win64 order
18				 ("%rdi","%rsi","%rdx","%rcx");	# Unix order
19
20print<<___;
21.extern		OPENSSL_cpuid_setup
22.hidden		OPENSSL_cpuid_setup
23.section	.init
24	call	OPENSSL_cpuid_setup
25
26.hidden	OPENSSL_ia32cap_P
27.comm	OPENSSL_ia32cap_P,16,4
28
29.text
30
31.globl	OPENSSL_atomic_add
32.type	OPENSSL_atomic_add,\@abi-omnipotent
33.align	16
34OPENSSL_atomic_add:
35	movl	($arg1),%eax
36.Lspin:	leaq	($arg2,%rax),%r8
37	.byte	0xf0		# lock
38	cmpxchgl	%r8d,($arg1)
39	jne	.Lspin
40	movl	%r8d,%eax
41	.byte	0x48,0x98	# cltq/cdqe
42	ret
43.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
44
45.globl	OPENSSL_rdtsc
46.type	OPENSSL_rdtsc,\@abi-omnipotent
47.align	16
48OPENSSL_rdtsc:
49	rdtsc
50	shl	\$32,%rdx
51	or	%rdx,%rax
52	ret
53.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc
54
55.globl	OPENSSL_ia32_cpuid
56.type	OPENSSL_ia32_cpuid,\@function,1
57.align	16
58OPENSSL_ia32_cpuid:
59	mov	%rbx,%r8		# save %rbx
60
61	xor	%eax,%eax
62	mov	%eax,8(%rdi)		# clear extended feature flags
63	cpuid
64	mov	%eax,%r11d		# max value for standard query level
65
66	xor	%eax,%eax
67	cmp	\$0x756e6547,%ebx	# "Genu"
68	setne	%al
69	mov	%eax,%r9d
70	cmp	\$0x49656e69,%edx	# "ineI"
71	setne	%al
72	or	%eax,%r9d
73	cmp	\$0x6c65746e,%ecx	# "ntel"
74	setne	%al
75	or	%eax,%r9d		# 0 indicates Intel CPU
76	jz	.Lintel
77
78	cmp	\$0x68747541,%ebx	# "Auth"
79	setne	%al
80	mov	%eax,%r10d
81	cmp	\$0x69746E65,%edx	# "enti"
82	setne	%al
83	or	%eax,%r10d
84	cmp	\$0x444D4163,%ecx	# "cAMD"
85	setne	%al
86	or	%eax,%r10d		# 0 indicates AMD CPU
87	jnz	.Lintel
88
89	# AMD specific
90	mov	\$0x80000000,%eax
91	cpuid
92	cmp	\$0x80000001,%eax
93	jb	.Lintel
94	mov	%eax,%r10d
95	mov	\$0x80000001,%eax
96	cpuid
97	or	%ecx,%r9d
98	and	\$0x00000801,%r9d	# isolate AMD XOP bit, 1<<11
99
100	cmp	\$0x80000008,%r10d
101	jb	.Lintel
102
103	mov	\$0x80000008,%eax
104	cpuid
105	movzb	%cl,%r10		# number of cores - 1
106	inc	%r10			# number of cores
107
108	mov	\$1,%eax
109	cpuid
110	bt	\$28,%edx		# test hyper-threading bit
111	jnc	.Lgeneric
112	shr	\$16,%ebx		# number of logical processors
113	cmp	%r10b,%bl
114	ja	.Lgeneric
115	and	\$0xefffffff,%edx	# ~(1<<28)
116	jmp	.Lgeneric
117
118.Lintel:
119	cmp	\$4,%r11d
120	mov	\$-1,%r10d
121	jb	.Lnocacheinfo
122
123	mov	\$4,%eax
124	mov	\$0,%ecx		# query L1D
125	cpuid
126	mov	%eax,%r10d
127	shr	\$14,%r10d
128	and	\$0xfff,%r10d		# number of cores -1 per L1D
129
130.Lnocacheinfo:
131	mov	\$1,%eax
132	cpuid
133	and	\$0xbfefffff,%edx	# force reserved bits to 0
134	cmp	\$0,%r9d
135	jne	.Lnotintel
136	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs
137	and	\$15,%ah
138	cmp	\$15,%ah		# examine Family ID
139	jne	.Lnotintel
140	or	\$0x00100000,%edx	# set reserved bit#20 to engage RC4_CHAR
141.Lnotintel:
142	bt	\$28,%edx		# test hyper-threading bit
143	jnc	.Lgeneric
144	and	\$0xefffffff,%edx	# ~(1<<28)
145	cmp	\$0,%r10d
146	je	.Lgeneric
147
148	or	\$0x10000000,%edx	# 1<<28
149	shr	\$16,%ebx
150	cmp	\$1,%bl			# see if cache is shared
151	ja	.Lgeneric
152	and	\$0xefffffff,%edx	# ~(1<<28)
153.Lgeneric:
154	and	\$0x00000800,%r9d	# isolate AMD XOP flag
155	and	\$0xfffff7ff,%ecx
156	or	%ecx,%r9d		# merge AMD XOP flag
157
158	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
159
160	cmp	\$7,%r11d
161	jb	.Lno_extended_info
162	mov	\$7,%eax
163	xor	%ecx,%ecx
164	cpuid
165	mov	%ebx,8(%rdi)		# save extended feature flags
166.Lno_extended_info:
167
168	bt	\$27,%r9d		# check OSXSAVE bit
169	jnc	.Lclear_avx
170	xor	%ecx,%ecx		# XCR0
171	.byte	0x0f,0x01,0xd0		# xgetbv
172	and	\$6,%eax		# isolate XMM and YMM state support
173	cmp	\$6,%eax
174	je	.Ldone
175.Lclear_avx:
176	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11)
177	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits
178	andl	\$0xffffffdf,8(%rdi)	# cleax AVX2, ~(1<<5)
179.Ldone:
180	shl	\$32,%r9
181	mov	%r10d,%eax
182	mov	%r8,%rbx		# restore %rbx
183	or	%r9,%rax
184	ret
185.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
186
187.globl  OPENSSL_cleanse
188.type   OPENSSL_cleanse,\@abi-omnipotent
189.align  16
190OPENSSL_cleanse:
191	xor	%rax,%rax
192	cmp	\$15,$arg2
193	jae	.Lot
194	cmp	\$0,$arg2
195	je	.Lret
196.Little:
197	mov	%al,($arg1)
198	sub	\$1,$arg2
199	lea	1($arg1),$arg1
200	jnz	.Little
201.Lret:
202	ret
203.align	16
204.Lot:
205	test	\$7,$arg1
206	jz	.Laligned
207	mov	%al,($arg1)
208	lea	-1($arg2),$arg2
209	lea	1($arg1),$arg1
210	jmp	.Lot
211.Laligned:
212	mov	%rax,($arg1)
213	lea	-8($arg2),$arg2
214	test	\$-8,$arg2
215	lea	8($arg1),$arg1
216	jnz	.Laligned
217	cmp	\$0,$arg2
218	jne	.Little
219	ret
220.size	OPENSSL_cleanse,.-OPENSSL_cleanse
221___
222
223print<<___ if (!$win64);
224.globl	OPENSSL_wipe_cpu
225.type	OPENSSL_wipe_cpu,\@abi-omnipotent
226.align	16
227OPENSSL_wipe_cpu:
228	pxor	%xmm0,%xmm0
229	pxor	%xmm1,%xmm1
230	pxor	%xmm2,%xmm2
231	pxor	%xmm3,%xmm3
232	pxor	%xmm4,%xmm4
233	pxor	%xmm5,%xmm5
234	pxor	%xmm6,%xmm6
235	pxor	%xmm7,%xmm7
236	pxor	%xmm8,%xmm8
237	pxor	%xmm9,%xmm9
238	pxor	%xmm10,%xmm10
239	pxor	%xmm11,%xmm11
240	pxor	%xmm12,%xmm12
241	pxor	%xmm13,%xmm13
242	pxor	%xmm14,%xmm14
243	pxor	%xmm15,%xmm15
244	xorq	%rcx,%rcx
245	xorq	%rdx,%rdx
246	xorq	%rsi,%rsi
247	xorq	%rdi,%rdi
248	xorq	%r8,%r8
249	xorq	%r9,%r9
250	xorq	%r10,%r10
251	xorq	%r11,%r11
252	leaq	8(%rsp),%rax
253	ret
254.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
255___
256print<<___ if ($win64);
257.globl	OPENSSL_wipe_cpu
258.type	OPENSSL_wipe_cpu,\@abi-omnipotent
259.align	16
260OPENSSL_wipe_cpu:
261	pxor	%xmm0,%xmm0
262	pxor	%xmm1,%xmm1
263	pxor	%xmm2,%xmm2
264	pxor	%xmm3,%xmm3
265	pxor	%xmm4,%xmm4
266	pxor	%xmm5,%xmm5
267	xorq	%rcx,%rcx
268	xorq	%rdx,%rdx
269	xorq	%r8,%r8
270	xorq	%r9,%r9
271	xorq	%r10,%r10
272	xorq	%r11,%r11
273	leaq	8(%rsp),%rax
274	ret
275.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
276___
277
278print<<___;
279.globl	OPENSSL_ia32_rdrand
280.type	OPENSSL_ia32_rdrand,\@abi-omnipotent
281.align	16
282OPENSSL_ia32_rdrand:
283	mov	\$8,%ecx
284.Loop_rdrand:
285	rdrand	%rax
286	jc	.Lbreak_rdrand
287	loop	.Loop_rdrand
288.Lbreak_rdrand:
289	cmp	\$0,%rax
290	cmove	%rcx,%rax
291	ret
292.size	OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
293
294.globl	OPENSSL_ia32_rdseed
295.type	OPENSSL_ia32_rdseed,\@abi-omnipotent
296.align	16
297OPENSSL_ia32_rdseed:
298	mov	\$8,%ecx
299.Loop_rdseed:
300	rdseed	%rax
301	jc	.Lbreak_rdseed
302	loop	.Loop_rdseed
303.Lbreak_rdseed:
304	cmp	\$0,%rax
305	cmove	%rcx,%rax
306	ret
307.size	OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
308___
309
310close STDOUT;	# flush
311