1160814Ssimon#!/usr/bin/env perl
2160814Ssimon
3160814Ssimonpush(@INC,"perlasm");
4160814Ssimonrequire "x86asm.pl";
5160814Ssimon
6160814Ssimon&asm_init($ARGV[0],"x86cpuid");
7160814Ssimon
8160814Ssimonfor (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
9160814Ssimon
10160814Ssimon&function_begin("OPENSSL_ia32_cpuid");
11160814Ssimon	&xor	("edx","edx");
12160814Ssimon	&pushf	();
13160814Ssimon	&pop	("eax");
14160814Ssimon	&mov	("ecx","eax");
15160814Ssimon	&xor	("eax",1<<21);
16160814Ssimon	&push	("eax");
17160814Ssimon	&popf	();
18160814Ssimon	&pushf	();
19160814Ssimon	&pop	("eax");
20160814Ssimon	&xor	("ecx","eax");
21160814Ssimon	&bt	("ecx",21);
22194206Ssimon	&jnc	(&label("done"));
23194206Ssimon	&xor	("eax","eax");
24194206Ssimon	&cpuid	();
25194206Ssimon	&xor	("eax","eax");
26194206Ssimon	&cmp	("ebx",0x756e6547);	# "Genu"
27194206Ssimon	&data_byte(0x0f,0x95,0xc0);	#&setne	(&LB("eax"));
28194206Ssimon	&mov	("ebp","eax");
29194206Ssimon	&cmp	("edx",0x49656e69);	# "ineI"
30194206Ssimon	&data_byte(0x0f,0x95,0xc0);	#&setne	(&LB("eax"));
31194206Ssimon	&or	("ebp","eax");
32194206Ssimon	&cmp	("ecx",0x6c65746e);	# "ntel"
33194206Ssimon	&data_byte(0x0f,0x95,0xc0);	#&setne	(&LB("eax"));
34194206Ssimon	&or	("ebp","eax");
35160814Ssimon	&mov	("eax",1);
36267285Sjkim	&xor	("ecx","ecx");
37160814Ssimon	&cpuid	();
38194206Ssimon	&cmp	("ebp",0);
39194206Ssimon	&jne	(&label("notP4"));
40194206Ssimon	&and	("eax",15<<8);		# familiy ID
41194206Ssimon	&cmp	("eax",15<<8);		# P4?
42194206Ssimon	&jne	(&label("notP4"));
43194206Ssimon	&or	("edx",1<<20);		# use reserved bit to engage RC4_CHAR
44194206Ssimon&set_label("notP4");
45194206Ssimon	&bt	("edx",28);		# test hyper-threading bit
46194206Ssimon	&jnc	(&label("done"));
47194206Ssimon	&shr	("ebx",16);
48194206Ssimon	&and	("ebx",0xff);
49194206Ssimon	&cmp	("ebx",1);		# see if cache is shared(*)
50194206Ssimon	&ja	(&label("done"));
51194206Ssimon	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
52194206Ssimon&set_label("done");
53160814Ssimon	&mov	("eax","edx");
54160814Ssimon	&mov	("edx","ecx");
55160814Ssimon&function_end("OPENSSL_ia32_cpuid");
56194206Ssimon# (*)	on Core2 this value is set to 2 denoting the fact that L2
57194206Ssimon#	cache is shared between cores.
58160814Ssimon
59160814Ssimon&external_label("OPENSSL_ia32cap_P");
60160814Ssimon
61160814Ssimon&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
62160814Ssimon	&xor	("eax","eax");
63160814Ssimon	&xor	("edx","edx");
64160814Ssimon	&picmeup("ecx","OPENSSL_ia32cap_P");
65160814Ssimon	&bt	(&DWP(0,"ecx"),4);
66160814Ssimon	&jnc	(&label("notsc"));
67160814Ssimon	&rdtsc	();
68160814Ssimon&set_label("notsc");
69160814Ssimon	&ret	();
70160814Ssimon&function_end_B("OPENSSL_rdtsc");
71160814Ssimon
72160814Ssimon# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
73160814Ssimon# but it's safe to call it on any [supported] 32-bit platform...
74160814Ssimon# Just check for [non-]zero return value...
75160814Ssimon&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
76160814Ssimon	&picmeup("ecx","OPENSSL_ia32cap_P");
77160814Ssimon	&bt	(&DWP(0,"ecx"),4);
78160814Ssimon	&jnc	(&label("nohalt"));	# no TSC
79160814Ssimon
80160814Ssimon	&data_word(0x9058900e);		# push %cs; pop %eax
81160814Ssimon	&and	("eax",3);
82160814Ssimon	&jnz	(&label("nohalt"));	# not enough privileges
83160814Ssimon
84160814Ssimon	&pushf	();
85160814Ssimon	&pop	("eax")
86160814Ssimon	&bt	("eax",9);
87160814Ssimon	&jnc	(&label("nohalt"));	# interrupts are disabled
88160814Ssimon
89160814Ssimon	&rdtsc	();
90160814Ssimon	&push	("edx");
91160814Ssimon	&push	("eax");
92160814Ssimon	&halt	();
93160814Ssimon	&rdtsc	();
94160814Ssimon
95160814Ssimon	&sub	("eax",&DWP(0,"esp"));
96160814Ssimon	&sbb	("edx",&DWP(4,"esp"));
97160814Ssimon	&add	("esp",8);
98160814Ssimon	&ret	();
99160814Ssimon
100160814Ssimon&set_label("nohalt");
101160814Ssimon	&xor	("eax","eax");
102160814Ssimon	&xor	("edx","edx");
103160814Ssimon	&ret	();
104160814Ssimon&function_end_B("OPENSSL_instrument_halt");
105160814Ssimon
106160814Ssimon# Essentially there is only one use for this function. Under DJGPP:
107160814Ssimon#
108160814Ssimon#	#include <go32.h>
109160814Ssimon#	...
110160814Ssimon#	i=OPENSSL_far_spin(_dos_ds,0x46c);
111160814Ssimon#	...
112160814Ssimon# to obtain the number of spins till closest timer interrupt.
113160814Ssimon
114160814Ssimon&function_begin_B("OPENSSL_far_spin");
115160814Ssimon	&pushf	();
116160814Ssimon	&pop	("eax")
117160814Ssimon	&bt	("eax",9);
118160814Ssimon	&jnc	(&label("nospin"));	# interrupts are disabled
119160814Ssimon
120160814Ssimon	&mov	("eax",&DWP(4,"esp"));
121160814Ssimon	&mov	("ecx",&DWP(8,"esp"));
122160814Ssimon	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
123160814Ssimon	&xor	("eax","eax");
124160814Ssimon	&mov	("edx",&DWP(0,"ecx"));
125160814Ssimon	&jmp	(&label("spin"));
126160814Ssimon
127160814Ssimon	&align	(16);
128160814Ssimon&set_label("spin");
129160814Ssimon	&inc	("eax");
130160814Ssimon	&cmp	("edx",&DWP(0,"ecx"));
131160814Ssimon	&je	(&label("spin"));
132160814Ssimon
133160814Ssimon	&data_word (0x1f909090);	# pop	%ds
134160814Ssimon	&ret	();
135160814Ssimon
136160814Ssimon&set_label("nospin");
137160814Ssimon	&xor	("eax","eax");
138160814Ssimon	&xor	("edx","edx");
139160814Ssimon	&ret	();
140160814Ssimon&function_end_B("OPENSSL_far_spin");
141160814Ssimon
142160814Ssimon&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
143160814Ssimon	&xor	("eax","eax");
144160814Ssimon	&xor	("edx","edx");
145160814Ssimon	&picmeup("ecx","OPENSSL_ia32cap_P");
146160814Ssimon	&mov	("ecx",&DWP(0,"ecx"));
147160814Ssimon	&bt	(&DWP(0,"ecx"),1);
148160814Ssimon	&jnc	(&label("no_x87"));
149160814Ssimon	if ($sse2) {
150160814Ssimon		&bt	(&DWP(0,"ecx"),26);
151160814Ssimon		&jnc	(&label("no_sse2"));
152160814Ssimon		&pxor	("xmm0","xmm0");
153160814Ssimon		&pxor	("xmm1","xmm1");
154160814Ssimon		&pxor	("xmm2","xmm2");
155160814Ssimon		&pxor	("xmm3","xmm3");
156160814Ssimon		&pxor	("xmm4","xmm4");
157160814Ssimon		&pxor	("xmm5","xmm5");
158160814Ssimon		&pxor	("xmm6","xmm6");
159160814Ssimon		&pxor	("xmm7","xmm7");
160160814Ssimon	&set_label("no_sse2");
161160814Ssimon	}
162160814Ssimon	# just a bunch of fldz to zap the fp/mm bank followed by finit...
163160814Ssimon	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
164160814Ssimon&set_label("no_x87");
165160814Ssimon	&lea	("eax",&DWP(4,"esp"));
166160814Ssimon	&ret	();
167160814Ssimon&function_end_B("OPENSSL_wipe_cpu");
168160814Ssimon
169160814Ssimon&function_begin_B("OPENSSL_atomic_add");
170160814Ssimon	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
171160814Ssimon	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
172160814Ssimon	&push	("ebx");
173160814Ssimon	&nop	();
174160814Ssimon	&mov	("eax",&DWP(0,"edx"));
175160814Ssimon&set_label("spin");
176160814Ssimon	&lea	("ebx",&DWP(0,"eax","ecx"));
177160814Ssimon	&nop	();
178160814Ssimon	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is envolved and is always reloaded
179160814Ssimon	&jne	(&label("spin"));
180160814Ssimon	&mov	("eax","ebx");	# OpenSSL expects the new value
181160814Ssimon	&pop	("ebx");
182160814Ssimon	&ret	();
183160814Ssimon&function_end_B("OPENSSL_atomic_add");
184160814Ssimon
185160814Ssimon# This function can become handy under Win32 in situations when
186160814Ssimon# we don't know which calling convention, __stdcall or __cdecl(*),
187160814Ssimon# indirect callee is using. In C it can be deployed as
188160814Ssimon#
189160814Ssimon#ifdef OPENSSL_CPUID_OBJ
190160814Ssimon#	type OPENSSL_indirect_call(void *f,...);
191160814Ssimon#	...
192160814Ssimon#	OPENSSL_indirect_call(func,[up to $max arguments]);
193160814Ssimon#endif
194160814Ssimon#
195160814Ssimon# (*)	it's designed to work even for __fastcall if number of
196160814Ssimon#	arguments is 1 or 2!
197160814Ssimon&function_begin_B("OPENSSL_indirect_call");
198160814Ssimon	{
199160814Ssimon	my $i,$max=7;		# $max has to be chosen as 4*n-1
200160814Ssimon				# in order to preserve eventual
201160814Ssimon				# stack alignment
202160814Ssimon	&push	("ebp");
203160814Ssimon	&mov	("ebp","esp");
204160814Ssimon	&sub	("esp",$max*4);
205160814Ssimon	&mov	("ecx",&DWP(12,"ebp"));
206160814Ssimon	&mov	(&DWP(0,"esp"),"ecx");
207160814Ssimon	&mov	("edx",&DWP(16,"ebp"));
208160814Ssimon	&mov	(&DWP(4,"esp"),"edx");
209160814Ssimon	for($i=2;$i<$max;$i++)
210160814Ssimon		{
211160814Ssimon		# Some copies will be redundant/bogus...
212160814Ssimon		&mov	("eax",&DWP(12+$i*4,"ebp"));
213160814Ssimon		&mov	(&DWP(0+$i*4,"esp"),"eax");
214160814Ssimon		}
215160814Ssimon	&call_ptr	(&DWP(8,"ebp"));# make the call...
216160814Ssimon	&mov	("esp","ebp");	# ... and just restore the stack pointer
217160814Ssimon				# without paying attention to what we called,
218160814Ssimon				# (__cdecl *func) or (__stdcall *one).
219160814Ssimon	&pop	("ebp");
220160814Ssimon	&ret	();
221160814Ssimon	}
222160814Ssimon&function_end_B("OPENSSL_indirect_call");
223160814Ssimon
224160814Ssimon&initseg("OPENSSL_cpuid_setup");
225160814Ssimon
226160814Ssimon&asm_finish();
227