1160814Ssimon#!/usr/bin/env perl 2160814Ssimon 3160814Ssimonpush(@INC,"perlasm"); 4160814Ssimonrequire "x86asm.pl"; 5160814Ssimon 6160814Ssimon&asm_init($ARGV[0],"x86cpuid"); 7160814Ssimon 8160814Ssimonfor (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 9160814Ssimon 10160814Ssimon&function_begin("OPENSSL_ia32_cpuid"); 11160814Ssimon &xor ("edx","edx"); 12160814Ssimon &pushf (); 13160814Ssimon &pop ("eax"); 14160814Ssimon &mov ("ecx","eax"); 15160814Ssimon &xor ("eax",1<<21); 16160814Ssimon &push ("eax"); 17160814Ssimon &popf (); 18160814Ssimon &pushf (); 19160814Ssimon &pop ("eax"); 20160814Ssimon &xor ("ecx","eax"); 21160814Ssimon &bt ("ecx",21); 22194206Ssimon &jnc (&label("done")); 23194206Ssimon &xor ("eax","eax"); 24194206Ssimon &cpuid (); 25194206Ssimon &xor ("eax","eax"); 26194206Ssimon &cmp ("ebx",0x756e6547); # "Genu" 27194206Ssimon &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax")); 28194206Ssimon &mov ("ebp","eax"); 29194206Ssimon &cmp ("edx",0x49656e69); # "ineI" 30194206Ssimon &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax")); 31194206Ssimon &or ("ebp","eax"); 32194206Ssimon &cmp ("ecx",0x6c65746e); # "ntel" 33194206Ssimon &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax")); 34194206Ssimon &or ("ebp","eax"); 35160814Ssimon &mov ("eax",1); 36267285Sjkim &xor ("ecx","ecx"); 37160814Ssimon &cpuid (); 38194206Ssimon &cmp ("ebp",0); 39194206Ssimon &jne (&label("notP4")); 40194206Ssimon &and ("eax",15<<8); # familiy ID 41194206Ssimon &cmp ("eax",15<<8); # P4? 42194206Ssimon &jne (&label("notP4")); 43194206Ssimon &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR 44194206Ssimon&set_label("notP4"); 45194206Ssimon &bt ("edx",28); # test hyper-threading bit 46194206Ssimon &jnc (&label("done")); 47194206Ssimon &shr ("ebx",16); 48194206Ssimon &and ("ebx",0xff); 49194206Ssimon &cmp ("ebx",1); # see if cache is shared(*) 50194206Ssimon &ja (&label("done")); 51194206Ssimon &and ("edx",0xefffffff); # clear hyper-threading bit if not 52194206Ssimon&set_label("done"); 53160814Ssimon &mov ("eax","edx"); 54160814Ssimon &mov ("edx","ecx"); 55160814Ssimon&function_end("OPENSSL_ia32_cpuid"); 56194206Ssimon# (*) on Core2 this value is set to 2 denoting the fact that L2 57194206Ssimon# cache is shared between cores. 58160814Ssimon 59160814Ssimon&external_label("OPENSSL_ia32cap_P"); 60160814Ssimon 61160814Ssimon&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 62160814Ssimon &xor ("eax","eax"); 63160814Ssimon &xor ("edx","edx"); 64160814Ssimon &picmeup("ecx","OPENSSL_ia32cap_P"); 65160814Ssimon &bt (&DWP(0,"ecx"),4); 66160814Ssimon &jnc (&label("notsc")); 67160814Ssimon &rdtsc (); 68160814Ssimon&set_label("notsc"); 69160814Ssimon &ret (); 70160814Ssimon&function_end_B("OPENSSL_rdtsc"); 71160814Ssimon 72160814Ssimon# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], 73160814Ssimon# but it's safe to call it on any [supported] 32-bit platform... 74160814Ssimon# Just check for [non-]zero return value... 75160814Ssimon&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 76160814Ssimon &picmeup("ecx","OPENSSL_ia32cap_P"); 77160814Ssimon &bt (&DWP(0,"ecx"),4); 78160814Ssimon &jnc (&label("nohalt")); # no TSC 79160814Ssimon 80160814Ssimon &data_word(0x9058900e); # push %cs; pop %eax 81160814Ssimon &and ("eax",3); 82160814Ssimon &jnz (&label("nohalt")); # not enough privileges 83160814Ssimon 84160814Ssimon &pushf (); 85160814Ssimon &pop ("eax") 86160814Ssimon &bt ("eax",9); 87160814Ssimon &jnc (&label("nohalt")); # interrupts are disabled 88160814Ssimon 89160814Ssimon &rdtsc (); 90160814Ssimon &push ("edx"); 91160814Ssimon &push ("eax"); 92160814Ssimon &halt (); 93160814Ssimon &rdtsc (); 94160814Ssimon 95160814Ssimon &sub ("eax",&DWP(0,"esp")); 96160814Ssimon &sbb ("edx",&DWP(4,"esp")); 97160814Ssimon &add ("esp",8); 98160814Ssimon &ret (); 99160814Ssimon 100160814Ssimon&set_label("nohalt"); 101160814Ssimon &xor ("eax","eax"); 102160814Ssimon &xor ("edx","edx"); 103160814Ssimon &ret (); 104160814Ssimon&function_end_B("OPENSSL_instrument_halt"); 105160814Ssimon 106160814Ssimon# Essentially there is only one use for this function. Under DJGPP: 107160814Ssimon# 108160814Ssimon# #include <go32.h> 109160814Ssimon# ... 110160814Ssimon# i=OPENSSL_far_spin(_dos_ds,0x46c); 111160814Ssimon# ... 112160814Ssimon# to obtain the number of spins till closest timer interrupt. 113160814Ssimon 114160814Ssimon&function_begin_B("OPENSSL_far_spin"); 115160814Ssimon &pushf (); 116160814Ssimon &pop ("eax") 117160814Ssimon &bt ("eax",9); 118160814Ssimon &jnc (&label("nospin")); # interrupts are disabled 119160814Ssimon 120160814Ssimon &mov ("eax",&DWP(4,"esp")); 121160814Ssimon &mov ("ecx",&DWP(8,"esp")); 122160814Ssimon &data_word (0x90d88e1e); # push %ds, mov %eax,%ds 123160814Ssimon &xor ("eax","eax"); 124160814Ssimon &mov ("edx",&DWP(0,"ecx")); 125160814Ssimon &jmp (&label("spin")); 126160814Ssimon 127160814Ssimon &align (16); 128160814Ssimon&set_label("spin"); 129160814Ssimon &inc ("eax"); 130160814Ssimon &cmp ("edx",&DWP(0,"ecx")); 131160814Ssimon &je (&label("spin")); 132160814Ssimon 133160814Ssimon &data_word (0x1f909090); # pop %ds 134160814Ssimon &ret (); 135160814Ssimon 136160814Ssimon&set_label("nospin"); 137160814Ssimon &xor ("eax","eax"); 138160814Ssimon &xor ("edx","edx"); 139160814Ssimon &ret (); 140160814Ssimon&function_end_B("OPENSSL_far_spin"); 141160814Ssimon 142160814Ssimon&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 143160814Ssimon &xor ("eax","eax"); 144160814Ssimon &xor ("edx","edx"); 145160814Ssimon &picmeup("ecx","OPENSSL_ia32cap_P"); 146160814Ssimon &mov ("ecx",&DWP(0,"ecx")); 147160814Ssimon &bt (&DWP(0,"ecx"),1); 148160814Ssimon &jnc (&label("no_x87")); 149160814Ssimon if ($sse2) { 150160814Ssimon &bt (&DWP(0,"ecx"),26); 151160814Ssimon &jnc (&label("no_sse2")); 152160814Ssimon &pxor ("xmm0","xmm0"); 153160814Ssimon &pxor ("xmm1","xmm1"); 154160814Ssimon &pxor ("xmm2","xmm2"); 155160814Ssimon &pxor ("xmm3","xmm3"); 156160814Ssimon &pxor ("xmm4","xmm4"); 157160814Ssimon &pxor ("xmm5","xmm5"); 158160814Ssimon &pxor ("xmm6","xmm6"); 159160814Ssimon &pxor ("xmm7","xmm7"); 160160814Ssimon &set_label("no_sse2"); 161160814Ssimon } 162160814Ssimon # just a bunch of fldz to zap the fp/mm bank followed by finit... 163160814Ssimon &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); 164160814Ssimon&set_label("no_x87"); 165160814Ssimon &lea ("eax",&DWP(4,"esp")); 166160814Ssimon &ret (); 167160814Ssimon&function_end_B("OPENSSL_wipe_cpu"); 168160814Ssimon 169160814Ssimon&function_begin_B("OPENSSL_atomic_add"); 170160814Ssimon &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg 171160814Ssimon &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg 172160814Ssimon &push ("ebx"); 173160814Ssimon &nop (); 174160814Ssimon &mov ("eax",&DWP(0,"edx")); 175160814Ssimon&set_label("spin"); 176160814Ssimon &lea ("ebx",&DWP(0,"eax","ecx")); 177160814Ssimon &nop (); 178160814Ssimon &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is envolved and is always reloaded 179160814Ssimon &jne (&label("spin")); 180160814Ssimon &mov ("eax","ebx"); # OpenSSL expects the new value 181160814Ssimon &pop ("ebx"); 182160814Ssimon &ret (); 183160814Ssimon&function_end_B("OPENSSL_atomic_add"); 184160814Ssimon 185160814Ssimon# This function can become handy under Win32 in situations when 186160814Ssimon# we don't know which calling convention, __stdcall or __cdecl(*), 187160814Ssimon# indirect callee is using. In C it can be deployed as 188160814Ssimon# 189160814Ssimon#ifdef OPENSSL_CPUID_OBJ 190160814Ssimon# type OPENSSL_indirect_call(void *f,...); 191160814Ssimon# ... 192160814Ssimon# OPENSSL_indirect_call(func,[up to $max arguments]); 193160814Ssimon#endif 194160814Ssimon# 195160814Ssimon# (*) it's designed to work even for __fastcall if number of 196160814Ssimon# arguments is 1 or 2! 197160814Ssimon&function_begin_B("OPENSSL_indirect_call"); 198160814Ssimon { 199160814Ssimon my $i,$max=7; # $max has to be chosen as 4*n-1 200160814Ssimon # in order to preserve eventual 201160814Ssimon # stack alignment 202160814Ssimon &push ("ebp"); 203160814Ssimon &mov ("ebp","esp"); 204160814Ssimon &sub ("esp",$max*4); 205160814Ssimon &mov ("ecx",&DWP(12,"ebp")); 206160814Ssimon &mov (&DWP(0,"esp"),"ecx"); 207160814Ssimon &mov ("edx",&DWP(16,"ebp")); 208160814Ssimon &mov (&DWP(4,"esp"),"edx"); 209160814Ssimon for($i=2;$i<$max;$i++) 210160814Ssimon { 211160814Ssimon # Some copies will be redundant/bogus... 212160814Ssimon &mov ("eax",&DWP(12+$i*4,"ebp")); 213160814Ssimon &mov (&DWP(0+$i*4,"esp"),"eax"); 214160814Ssimon } 215160814Ssimon &call_ptr (&DWP(8,"ebp"));# make the call... 216160814Ssimon &mov ("esp","ebp"); # ... and just restore the stack pointer 217160814Ssimon # without paying attention to what we called, 218160814Ssimon # (__cdecl *func) or (__stdcall *one). 219160814Ssimon &pop ("ebp"); 220160814Ssimon &ret (); 221160814Ssimon } 222160814Ssimon&function_end_B("OPENSSL_indirect_call"); 223160814Ssimon 224160814Ssimon&initseg("OPENSSL_cpuid_setup"); 225160814Ssimon 226160814Ssimon&asm_finish(); 227