1#!/usr/bin/env perl
2
3$flavour = shift;
4$output  = shift;
5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12die "can't locate x86_64-xlate.pl";
13
14open OUT,"| \"$^X\" $xlate $flavour $output";
15*STDOUT=*OUT;
16
17($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") :	# Win64 order
18				 ("%rdi","%rsi","%rdx","%rcx");	# Unix order
19
20print<<___;
21.extern		OPENSSL_cpuid_setup
22.hidden		OPENSSL_cpuid_setup
23.section	.init
24	call	OPENSSL_cpuid_setup
25
26.hidden	OPENSSL_ia32cap_P
27.comm	OPENSSL_ia32cap_P,16,4
28
29.text
30
31.globl	OPENSSL_atomic_add
32.type	OPENSSL_atomic_add,\@abi-omnipotent
33.align	16
34OPENSSL_atomic_add:
35	movl	($arg1),%eax
36.Lspin:	leaq	($arg2,%rax),%r8
37	.byte	0xf0		# lock
38	cmpxchgl	%r8d,($arg1)
39	jne	.Lspin
40	movl	%r8d,%eax
41	.byte	0x48,0x98	# cltq/cdqe
42	ret
43.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
44
45.globl	OPENSSL_rdtsc
46.type	OPENSSL_rdtsc,\@abi-omnipotent
47.align	16
48OPENSSL_rdtsc:
49	rdtsc
50	shl	\$32,%rdx
51	or	%rdx,%rax
52	ret
53.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc
54
55.globl	OPENSSL_ia32_cpuid
56.type	OPENSSL_ia32_cpuid,\@function,1
57.align	16
58OPENSSL_ia32_cpuid:
59	mov	%rbx,%r8		# save %rbx
60
61	xor	%eax,%eax
62	mov	%eax,8(%rdi)		# clear 3rd word
63	cpuid
64	mov	%eax,%r11d		# max value for standard query level
65
66	xor	%eax,%eax
67	cmp	\$0x756e6547,%ebx	# "Genu"
68	setne	%al
69	mov	%eax,%r9d
70	cmp	\$0x49656e69,%edx	# "ineI"
71	setne	%al
72	or	%eax,%r9d
73	cmp	\$0x6c65746e,%ecx	# "ntel"
74	setne	%al
75	or	%eax,%r9d		# 0 indicates Intel CPU
76	jz	.Lintel
77
78	cmp	\$0x68747541,%ebx	# "Auth"
79	setne	%al
80	mov	%eax,%r10d
81	cmp	\$0x69746E65,%edx	# "enti"
82	setne	%al
83	or	%eax,%r10d
84	cmp	\$0x444D4163,%ecx	# "cAMD"
85	setne	%al
86	or	%eax,%r10d		# 0 indicates AMD CPU
87	jnz	.Lintel
88
89	# AMD specific
90	mov	\$0x80000000,%eax
91	cpuid
92	cmp	\$0x80000001,%eax
93	jb	.Lintel
94	mov	%eax,%r10d
95	mov	\$0x80000001,%eax
96	cpuid
97	or	%ecx,%r9d
98	and	\$0x00000801,%r9d	# isolate AMD XOP bit, 1<<11
99
100	cmp	\$0x80000008,%r10d
101	jb	.Lintel
102
103	mov	\$0x80000008,%eax
104	cpuid
105	movzb	%cl,%r10		# number of cores - 1
106	inc	%r10			# number of cores
107
108	mov	\$1,%eax
109	cpuid
110	bt	\$28,%edx		# test hyper-threading bit
111	jnc	.Lgeneric
112	shr	\$16,%ebx		# number of logical processors
113	cmp	%r10b,%bl
114	ja	.Lgeneric
115	and	\$0xefffffff,%edx	# ~(1<<28)
116	jmp	.Lgeneric
117
118.Lintel:
119	cmp	\$4,%r11d
120	mov	\$-1,%r10d
121	jb	.Lnocacheinfo
122
123	mov	\$4,%eax
124	mov	\$0,%ecx		# query L1D
125	cpuid
126	mov	%eax,%r10d
127	shr	\$14,%r10d
128	and	\$0xfff,%r10d		# number of cores -1 per L1D
129
130	cmp	\$7,%r11d
131	jb	.Lnocacheinfo
132
133	mov	\$7,%eax
134	xor	%ecx,%ecx
135	cpuid
136	mov	%ebx,8(%rdi)
137
138.Lnocacheinfo:
139	mov	\$1,%eax
140	cpuid
141	and	\$0xbfefffff,%edx	# force reserved bits to 0
142	cmp	\$0,%r9d
143	jne	.Lnotintel
144	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs
145	and	\$15,%ah
146	cmp	\$15,%ah		# examine Family ID
147	jne	.Lnotintel
148	or	\$0x00100000,%edx	# set reserved bit#20 to engage RC4_CHAR
149.Lnotintel:
150	bt	\$28,%edx		# test hyper-threading bit
151	jnc	.Lgeneric
152	and	\$0xefffffff,%edx	# ~(1<<28)
153	cmp	\$0,%r10d
154	je	.Lgeneric
155
156	or	\$0x10000000,%edx	# 1<<28
157	shr	\$16,%ebx
158	cmp	\$1,%bl			# see if cache is shared
159	ja	.Lgeneric
160	and	\$0xefffffff,%edx	# ~(1<<28)
161.Lgeneric:
162	and	\$0x00000800,%r9d	# isolate AMD XOP flag
163	and	\$0xfffff7ff,%ecx
164	or	%ecx,%r9d		# merge AMD XOP flag
165
166	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
167	bt	\$27,%r9d		# check OSXSAVE bit
168	jnc	.Lclear_avx
169	xor	%ecx,%ecx		# XCR0
170	.byte	0x0f,0x01,0xd0		# xgetbv
171	and	\$6,%eax		# isolate XMM and YMM state support
172	cmp	\$6,%eax
173	je	.Ldone
174.Lclear_avx:
175	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11)
176	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits
177	andl	\$0xffffffdf,8(%rdi)	# cleax AVX2, ~(1<<5)
178.Ldone:
179	shl	\$32,%r9
180	mov	%r10d,%eax
181	mov	%r8,%rbx		# restore %rbx
182	or	%r9,%rax
183	ret
184.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
185
186.globl  OPENSSL_cleanse
187.type   OPENSSL_cleanse,\@abi-omnipotent
188.align  16
189OPENSSL_cleanse:
190	xor	%rax,%rax
191	cmp	\$15,$arg2
192	jae	.Lot
193	cmp	\$0,$arg2
194	je	.Lret
195.Little:
196	mov	%al,($arg1)
197	sub	\$1,$arg2
198	lea	1($arg1),$arg1
199	jnz	.Little
200.Lret:
201	ret
202.align	16
203.Lot:
204	test	\$7,$arg1
205	jz	.Laligned
206	mov	%al,($arg1)
207	lea	-1($arg2),$arg2
208	lea	1($arg1),$arg1
209	jmp	.Lot
210.Laligned:
211	mov	%rax,($arg1)
212	lea	-8($arg2),$arg2
213	test	\$-8,$arg2
214	lea	8($arg1),$arg1
215	jnz	.Laligned
216	cmp	\$0,$arg2
217	jne	.Little
218	ret
219.size	OPENSSL_cleanse,.-OPENSSL_cleanse
220___
221
222print<<___ if (!$win64);
223.globl	OPENSSL_wipe_cpu
224.type	OPENSSL_wipe_cpu,\@abi-omnipotent
225.align	16
226OPENSSL_wipe_cpu:
227	pxor	%xmm0,%xmm0
228	pxor	%xmm1,%xmm1
229	pxor	%xmm2,%xmm2
230	pxor	%xmm3,%xmm3
231	pxor	%xmm4,%xmm4
232	pxor	%xmm5,%xmm5
233	pxor	%xmm6,%xmm6
234	pxor	%xmm7,%xmm7
235	pxor	%xmm8,%xmm8
236	pxor	%xmm9,%xmm9
237	pxor	%xmm10,%xmm10
238	pxor	%xmm11,%xmm11
239	pxor	%xmm12,%xmm12
240	pxor	%xmm13,%xmm13
241	pxor	%xmm14,%xmm14
242	pxor	%xmm15,%xmm15
243	xorq	%rcx,%rcx
244	xorq	%rdx,%rdx
245	xorq	%rsi,%rsi
246	xorq	%rdi,%rdi
247	xorq	%r8,%r8
248	xorq	%r9,%r9
249	xorq	%r10,%r10
250	xorq	%r11,%r11
251	leaq	8(%rsp),%rax
252	ret
253.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
254___
255print<<___ if ($win64);
256.globl	OPENSSL_wipe_cpu
257.type	OPENSSL_wipe_cpu,\@abi-omnipotent
258.align	16
259OPENSSL_wipe_cpu:
260	pxor	%xmm0,%xmm0
261	pxor	%xmm1,%xmm1
262	pxor	%xmm2,%xmm2
263	pxor	%xmm3,%xmm3
264	pxor	%xmm4,%xmm4
265	pxor	%xmm5,%xmm5
266	xorq	%rcx,%rcx
267	xorq	%rdx,%rdx
268	xorq	%r8,%r8
269	xorq	%r9,%r9
270	xorq	%r10,%r10
271	xorq	%r11,%r11
272	leaq	8(%rsp),%rax
273	ret
274.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
275___
276
277print<<___;
278.globl	OPENSSL_ia32_rdrand
279.type	OPENSSL_ia32_rdrand,\@abi-omnipotent
280.align	16
281OPENSSL_ia32_rdrand:
282	mov	\$8,%ecx
283.Loop_rdrand:
284	rdrand	%rax
285	jc	.Lbreak_rdrand
286	loop	.Loop_rdrand
287.Lbreak_rdrand:
288	cmp	\$0,%rax
289	cmove	%rcx,%rax
290	ret
291.size	OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
292
293.globl	OPENSSL_ia32_rdseed
294.type	OPENSSL_ia32_rdseed,\@abi-omnipotent
295.align	16
296OPENSSL_ia32_rdseed:
297	mov	\$8,%ecx
298.Loop_rdseed:
299	rdseed	%rax
300	jc	.Lbreak_rdseed
301	loop	.Loop_rdseed
302.Lbreak_rdseed:
303	cmp	\$0,%rax
304	cmove	%rcx,%rax
305	ret
306.size	OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
307___
308
309close STDOUT;	# flush
310