1#!/usr/bin/env perl 2 3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 4push(@INC, "${dir}perlasm", "perlasm"); 5require "x86asm.pl"; 6 7&asm_init($ARGV[0],"x86cpuid"); 8 9for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 10 11&function_begin("OPENSSL_ia32_cpuid"); 12 &xor ("edx","edx"); 13 &pushf (); 14 &pop ("eax"); 15 &mov ("ecx","eax"); 16 &xor ("eax",1<<21); 17 &push ("eax"); 18 &popf (); 19 &pushf (); 20 &pop ("eax"); 21 &xor ("ecx","eax"); 22 &bt ("ecx",21); 23 &jnc (&label("done")); 24 &xor ("eax","eax"); 25 &cpuid (); 26 &mov ("edi","eax"); # max value for standard query level 27 28 &xor ("eax","eax"); 29 &cmp ("ebx",0x756e6547); # "Genu" 30 &setne (&LB("eax")); 31 &mov ("ebp","eax"); 32 &cmp ("edx",0x49656e69); # "ineI" 33 &setne (&LB("eax")); 34 &or ("ebp","eax"); 35 &cmp ("ecx",0x6c65746e); # "ntel" 36 &setne (&LB("eax")); 37 &or ("ebp","eax"); # 0 indicates Intel CPU 38 &jz (&label("intel")); 39 40 &cmp ("ebx",0x68747541); # "Auth" 41 &setne (&LB("eax")); 42 &mov ("esi","eax"); 43 &cmp ("edx",0x69746E65); # "enti" 44 &setne (&LB("eax")); 45 &or ("esi","eax"); 46 &cmp ("ecx",0x444D4163); # "cAMD" 47 &setne (&LB("eax")); 48 &or ("esi","eax"); # 0 indicates AMD CPU 49 &jnz (&label("intel")); 50 51 # AMD specific 52 &mov ("eax",0x80000000); 53 &cpuid (); 54 &cmp ("eax",0x80000008); 55 &jb (&label("intel")); 56 57 &mov ("eax",0x80000008); 58 &cpuid (); 59 &movz ("esi",&LB("ecx")); # number of cores - 1 60 &inc ("esi"); # number of cores 61 62 &mov ("eax",1); 63 &xor ("ecx","ecx"); 64 &cpuid (); 65 &bt ("edx",28); 66 &jnc (&label("done")); 67 &shr ("ebx",16); 68 &and ("ebx",0xff); 69 &cmp ("ebx","esi"); 70 &ja (&label("done")); 71 &and ("edx",0xefffffff); # clear hyper-threading bit 72 &jmp (&label("done")); 73 74&set_label("intel"); 75 &cmp ("edi",4); 76 &mov ("edi",-1); 77 &jb (&label("nocacheinfo")); 78 79 &mov ("eax",4); 80 &mov ("ecx",0); # query L1D 81 &cpuid (); 82 &mov ("edi","eax"); 83 &shr ("edi",14); 84 &and ("edi",0xfff); # number of cores -1 per L1D 85 86&set_label("nocacheinfo"); 87 &mov ("eax",1); 88 &xor ("ecx","ecx"); 89 &cpuid (); 90 &cmp ("ebp",0); 91 &jne (&label("notP4")); 92 &and (&HB("eax"),15); # familiy ID 93 &cmp (&HB("eax"),15); # P4? 94 &jne (&label("notP4")); 95 &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR 96&set_label("notP4"); 97 &bt ("edx",28); # test hyper-threading bit 98 &jnc (&label("done")); 99 &and ("edx",0xefffffff); 100 &cmp ("edi",0); 101 &je (&label("done")); 102 103 &or ("edx",0x10000000); 104 &shr ("ebx",16); 105 &cmp (&LB("ebx"),1); 106 &ja (&label("done")); 107 &and ("edx",0xefffffff); # clear hyper-threading bit if not 108&set_label("done"); 109 &mov ("eax","edx"); 110 &mov ("edx","ecx"); 111&function_end("OPENSSL_ia32_cpuid"); 112 113&external_label("OPENSSL_ia32cap_P"); 114 115&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 116 &xor ("eax","eax"); 117 &xor ("edx","edx"); 118 &picmeup("ecx","OPENSSL_ia32cap_P"); 119 &bt (&DWP(0,"ecx"),4); 120 &jnc (&label("notsc")); 121 &rdtsc (); 122&set_label("notsc"); 123 &ret (); 124&function_end_B("OPENSSL_rdtsc"); 125 126# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], 127# but it's safe to call it on any [supported] 32-bit platform... 128# Just check for [non-]zero return value... 129&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 130 &picmeup("ecx","OPENSSL_ia32cap_P"); 131 &bt (&DWP(0,"ecx"),4); 132 &jnc (&label("nohalt")); # no TSC 133 134 &data_word(0x9058900e); # push %cs; pop %eax 135 &and ("eax",3); 136 &jnz (&label("nohalt")); # not enough privileges 137 138 &pushf (); 139 &pop ("eax") 140 &bt ("eax",9); 141 &jnc (&label("nohalt")); # interrupts are disabled 142 143 &rdtsc (); 144 &push ("edx"); 145 &push ("eax"); 146 &halt (); 147 &rdtsc (); 148 149 &sub ("eax",&DWP(0,"esp")); 150 &sbb ("edx",&DWP(4,"esp")); 151 &add ("esp",8); 152 &ret (); 153 154&set_label("nohalt"); 155 &xor ("eax","eax"); 156 &xor ("edx","edx"); 157 &ret (); 158&function_end_B("OPENSSL_instrument_halt"); 159 160# Essentially there is only one use for this function. Under DJGPP: 161# 162# #include <go32.h> 163# ... 164# i=OPENSSL_far_spin(_dos_ds,0x46c); 165# ... 166# to obtain the number of spins till closest timer interrupt. 167 168&function_begin_B("OPENSSL_far_spin"); 169 &pushf (); 170 &pop ("eax") 171 &bt ("eax",9); 172 &jnc (&label("nospin")); # interrupts are disabled 173 174 &mov ("eax",&DWP(4,"esp")); 175 &mov ("ecx",&DWP(8,"esp")); 176 &data_word (0x90d88e1e); # push %ds, mov %eax,%ds 177 &xor ("eax","eax"); 178 &mov ("edx",&DWP(0,"ecx")); 179 &jmp (&label("spin")); 180 181 &align (16); 182&set_label("spin"); 183 &inc ("eax"); 184 &cmp ("edx",&DWP(0,"ecx")); 185 &je (&label("spin")); 186 187 &data_word (0x1f909090); # pop %ds 188 &ret (); 189 190&set_label("nospin"); 191 &xor ("eax","eax"); 192 &xor ("edx","edx"); 193 &ret (); 194&function_end_B("OPENSSL_far_spin"); 195 196&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 197 &xor ("eax","eax"); 198 &xor ("edx","edx"); 199 &picmeup("ecx","OPENSSL_ia32cap_P"); 200 &mov ("ecx",&DWP(0,"ecx")); 201 &bt (&DWP(0,"ecx"),1); 202 &jnc (&label("no_x87")); 203 if ($sse2) { 204 &bt (&DWP(0,"ecx"),26); 205 &jnc (&label("no_sse2")); 206 &pxor ("xmm0","xmm0"); 207 &pxor ("xmm1","xmm1"); 208 &pxor ("xmm2","xmm2"); 209 &pxor ("xmm3","xmm3"); 210 &pxor ("xmm4","xmm4"); 211 &pxor ("xmm5","xmm5"); 212 &pxor ("xmm6","xmm6"); 213 &pxor ("xmm7","xmm7"); 214 &set_label("no_sse2"); 215 } 216 # just a bunch of fldz to zap the fp/mm bank followed by finit... 217 &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); 218&set_label("no_x87"); 219 &lea ("eax",&DWP(4,"esp")); 220 &ret (); 221&function_end_B("OPENSSL_wipe_cpu"); 222 223&function_begin_B("OPENSSL_atomic_add"); 224 &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg 225 &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg 226 &push ("ebx"); 227 &nop (); 228 &mov ("eax",&DWP(0,"edx")); 229&set_label("spin"); 230 &lea ("ebx",&DWP(0,"eax","ecx")); 231 &nop (); 232 &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is envolved and is always reloaded 233 &jne (&label("spin")); 234 &mov ("eax","ebx"); # OpenSSL expects the new value 235 &pop ("ebx"); 236 &ret (); 237&function_end_B("OPENSSL_atomic_add"); 238 239# This function can become handy under Win32 in situations when 240# we don't know which calling convention, __stdcall or __cdecl(*), 241# indirect callee is using. In C it can be deployed as 242# 243#ifdef OPENSSL_CPUID_OBJ 244# type OPENSSL_indirect_call(void *f,...); 245# ... 246# OPENSSL_indirect_call(func,[up to $max arguments]); 247#endif 248# 249# (*) it's designed to work even for __fastcall if number of 250# arguments is 1 or 2! 251&function_begin_B("OPENSSL_indirect_call"); 252 { 253 my $i,$max=7; # $max has to be chosen as 4*n-1 254 # in order to preserve eventual 255 # stack alignment 256 &push ("ebp"); 257 &mov ("ebp","esp"); 258 &sub ("esp",$max*4); 259 &mov ("ecx",&DWP(12,"ebp")); 260 &mov (&DWP(0,"esp"),"ecx"); 261 &mov ("edx",&DWP(16,"ebp")); 262 &mov (&DWP(4,"esp"),"edx"); 263 for($i=2;$i<$max;$i++) 264 { 265 # Some copies will be redundant/bogus... 266 &mov ("eax",&DWP(12+$i*4,"ebp")); 267 &mov (&DWP(0+$i*4,"esp"),"eax"); 268 } 269 &call_ptr (&DWP(8,"ebp"));# make the call... 270 &mov ("esp","ebp"); # ... and just restore the stack pointer 271 # without paying attention to what we called, 272 # (__cdecl *func) or (__stdcall *one). 273 &pop ("ebp"); 274 &ret (); 275 } 276&function_end_B("OPENSSL_indirect_call"); 277 278&function_begin_B("OPENSSL_cleanse"); 279 &mov ("edx",&wparam(0)); 280 &mov ("ecx",&wparam(1)); 281 &xor ("eax","eax"); 282 &cmp ("ecx",7); 283 &jae (&label("lot")); 284 &cmp ("ecx",0); 285 &je (&label("ret")); 286&set_label("little"); 287 &mov (&BP(0,"edx"),"al"); 288 &sub ("ecx",1); 289 &lea ("edx",&DWP(1,"edx")); 290 &jnz (&label("little")); 291&set_label("ret"); 292 &ret (); 293 294&set_label("lot",16); 295 &test ("edx",3); 296 &jz (&label("aligned")); 297 &mov (&BP(0,"edx"),"al"); 298 &lea ("ecx",&DWP(-1,"ecx")); 299 &lea ("edx",&DWP(1,"edx")); 300 &jmp (&label("lot")); 301&set_label("aligned"); 302 &mov (&DWP(0,"edx"),"eax"); 303 &lea ("ecx",&DWP(-4,"ecx")); 304 &test ("ecx",-4); 305 &lea ("edx",&DWP(4,"edx")); 306 &jnz (&label("aligned")); 307 &cmp ("ecx",0); 308 &jne (&label("little")); 309 &ret (); 310&function_end_B("OPENSSL_cleanse"); 311 312&initseg("OPENSSL_cpuid_setup"); 313 314&asm_finish(); 315