x86cpuid.pl revision 325335
1#!/usr/bin/env perl 2 3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 4push(@INC, "${dir}perlasm", "perlasm"); 5require "x86asm.pl"; 6 7&asm_init($ARGV[0],"x86cpuid"); 8 9for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 10 11&function_begin("OPENSSL_ia32_cpuid"); 12 &xor ("edx","edx"); 13 &pushf (); 14 &pop ("eax"); 15 &mov ("ecx","eax"); 16 &xor ("eax",1<<21); 17 &push ("eax"); 18 &popf (); 19 &pushf (); 20 &pop ("eax"); 21 &xor ("ecx","eax"); 22 &xor ("eax","eax"); 23 &mov ("esi",&wparam(0)); 24 &mov (&DWP(8,"esi"),"eax"); # clear extended feature flags 25 &bt ("ecx",21); 26 &jnc (&label("nocpuid")); 27 &cpuid (); 28 &mov ("edi","eax"); # max value for standard query level 29 30 &xor ("eax","eax"); 31 &cmp ("ebx",0x756e6547); # "Genu" 32 &setne (&LB("eax")); 33 &mov ("ebp","eax"); 34 &cmp ("edx",0x49656e69); # "ineI" 35 &setne (&LB("eax")); 36 &or ("ebp","eax"); 37 &cmp ("ecx",0x6c65746e); # "ntel" 38 &setne (&LB("eax")); 39 &or ("ebp","eax"); # 0 indicates Intel CPU 40 &jz (&label("intel")); 41 42 &cmp ("ebx",0x68747541); # "Auth" 43 &setne (&LB("eax")); 44 &mov ("esi","eax"); 45 &cmp ("edx",0x69746E65); # "enti" 46 &setne (&LB("eax")); 47 &or ("esi","eax"); 48 &cmp ("ecx",0x444D4163); # "cAMD" 49 &setne (&LB("eax")); 50 &or ("esi","eax"); # 0 indicates AMD CPU 51 &jnz (&label("intel")); 52 53 # AMD specific 54 &mov ("eax",0x80000000); 55 &cpuid (); 56 &cmp ("eax",0x80000001); 57 &jb (&label("intel")); 58 &mov ("esi","eax"); 59 &mov ("eax",0x80000001); 60 &cpuid (); 61 &or ("ebp","ecx"); 62 &and ("ebp",1<<11|1); # isolate XOP bit 63 &cmp ("esi",0x80000008); 64 &jb (&label("intel")); 65 66 &mov ("eax",0x80000008); 67 &cpuid (); 68 &movz ("esi",&LB("ecx")); # number of cores - 1 69 &inc ("esi"); # number of cores 70 71 &mov ("eax",1); 72 &xor ("ecx","ecx"); 73 &cpuid (); 74 &bt ("edx",28); 75 &jnc (&label("generic")); 76 &shr ("ebx",16); 77 &and ("ebx",0xff); 78 &cmp ("ebx","esi"); 79 &ja (&label("generic")); 80 &and ("edx",0xefffffff); # clear hyper-threading bit 81 &jmp (&label("generic")); 82 83&set_label("intel"); 84 &cmp ("edi",4); 85 &mov ("esi",-1); 86 &jb (&label("nocacheinfo")); 87 88 &mov ("eax",4); 89 &mov ("ecx",0); # query L1D 90 &cpuid (); 91 &mov ("esi","eax"); 92 &shr ("esi",14); 93 &and ("esi",0xfff); # number of cores -1 per L1D 94 95&set_label("nocacheinfo"); 96 &mov ("eax",1); 97 &xor ("ecx","ecx"); 98 &cpuid (); 99 &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 100 &cmp ("ebp",0); 101 &jne (&label("notintel")); 102 &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs 103 &and (&HB("eax"),15); # familiy ID 104 &cmp (&HB("eax"),15); # P4? 105 &jne (&label("notintel")); 106 &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR 107&set_label("notintel"); 108 &bt ("edx",28); # test hyper-threading bit 109 &jnc (&label("generic")); 110 &and ("edx",0xefffffff); 111 &cmp ("esi",0); 112 &je (&label("generic")); 113 114 &or ("edx",0x10000000); 115 &shr ("ebx",16); 116 &cmp (&LB("ebx"),1); 117 &ja (&label("generic")); 118 &and ("edx",0xefffffff); # clear hyper-threading bit if not 119 120&set_label("generic"); 121 &and ("ebp",1<<11); # isolate AMD XOP flag 122 &and ("ecx",0xfffff7ff); # force 11th bit to 0 123 &mov ("esi","edx"); # %ebp:%esi is copy of %ecx:%edx 124 &or ("ebp","ecx"); # merge AMD XOP flag 125 126 &cmp ("edi",7); 127 &mov ("edi",&wparam(0)); 128 &jb (&label("no_extended_info")); 129 &mov ("eax",7); 130 &xor ("ecx","ecx"); 131 &cpuid (); 132 &mov (&DWP(8,"edi"),"ebx"); # save extended feature flag 133&set_label("no_extended_info"); 134 135 &bt ("ebp",27); # check OSXSAVE bit 136 &jnc (&label("clear_avx")); 137 &xor ("ecx","ecx"); 138 &data_byte(0x0f,0x01,0xd0); # xgetbv 139 &and ("eax",6); 140 &cmp ("eax",6); 141 &je (&label("done")); 142 &cmp ("eax",2); 143 &je (&label("clear_avx")); 144&set_label("clear_xmm"); 145 &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits 146 &and ("esi",0xfeffffff); # clear FXSR 147&set_label("clear_avx"); 148 &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits 149 &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 150&set_label("done"); 151 &mov ("eax","esi"); 152 &mov ("edx","ebp"); 153&set_label("nocpuid"); 154&function_end("OPENSSL_ia32_cpuid"); 155 156&external_label("OPENSSL_ia32cap_P"); 157 158&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 159 &xor ("eax","eax"); 160 &xor ("edx","edx"); 161 &picmeup("ecx","OPENSSL_ia32cap_P"); 162 &bt (&DWP(0,"ecx"),4); 163 &jnc (&label("notsc")); 164 &rdtsc (); 165&set_label("notsc"); 166 &ret (); 167&function_end_B("OPENSSL_rdtsc"); 168 169# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], 170# but it's safe to call it on any [supported] 32-bit platform... 171# Just check for [non-]zero return value... 172&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 173 &picmeup("ecx","OPENSSL_ia32cap_P"); 174 &bt (&DWP(0,"ecx"),4); 175 &jnc (&label("nohalt")); # no TSC 176 177 &data_word(0x9058900e); # push %cs; pop %eax 178 &and ("eax",3); 179 &jnz (&label("nohalt")); # not enough privileges 180 181 &pushf (); 182 &pop ("eax"); 183 &bt ("eax",9); 184 &jnc (&label("nohalt")); # interrupts are disabled 185 186 &rdtsc (); 187 &push ("edx"); 188 &push ("eax"); 189 &halt (); 190 &rdtsc (); 191 192 &sub ("eax",&DWP(0,"esp")); 193 &sbb ("edx",&DWP(4,"esp")); 194 &add ("esp",8); 195 &ret (); 196 197&set_label("nohalt"); 198 &xor ("eax","eax"); 199 &xor ("edx","edx"); 200 &ret (); 201&function_end_B("OPENSSL_instrument_halt"); 202 203# Essentially there is only one use for this function. Under DJGPP: 204# 205# #include <go32.h> 206# ... 207# i=OPENSSL_far_spin(_dos_ds,0x46c); 208# ... 209# to obtain the number of spins till closest timer interrupt. 210 211&function_begin_B("OPENSSL_far_spin"); 212 &pushf (); 213 &pop ("eax"); 214 &bt ("eax",9); 215 &jnc (&label("nospin")); # interrupts are disabled 216 217 &mov ("eax",&DWP(4,"esp")); 218 &mov ("ecx",&DWP(8,"esp")); 219 &data_word (0x90d88e1e); # push %ds, mov %eax,%ds 220 &xor ("eax","eax"); 221 &mov ("edx",&DWP(0,"ecx")); 222 &jmp (&label("spin")); 223 224 &align (16); 225&set_label("spin"); 226 &inc ("eax"); 227 &cmp ("edx",&DWP(0,"ecx")); 228 &je (&label("spin")); 229 230 &data_word (0x1f909090); # pop %ds 231 &ret (); 232 233&set_label("nospin"); 234 &xor ("eax","eax"); 235 &xor ("edx","edx"); 236 &ret (); 237&function_end_B("OPENSSL_far_spin"); 238 239&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); 240 &xor ("eax","eax"); 241 &xor ("edx","edx"); 242 &picmeup("ecx","OPENSSL_ia32cap_P"); 243 &mov ("ecx",&DWP(0,"ecx")); 244 &bt (&DWP(0,"ecx"),1); 245 &jnc (&label("no_x87")); 246 if ($sse2) { 247 &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits 248 &cmp ("ecx",1<<26|1<<24); 249 &jne (&label("no_sse2")); 250 &pxor ("xmm0","xmm0"); 251 &pxor ("xmm1","xmm1"); 252 &pxor ("xmm2","xmm2"); 253 &pxor ("xmm3","xmm3"); 254 &pxor ("xmm4","xmm4"); 255 &pxor ("xmm5","xmm5"); 256 &pxor ("xmm6","xmm6"); 257 &pxor ("xmm7","xmm7"); 258 &set_label("no_sse2"); 259 } 260 # just a bunch of fldz to zap the fp/mm bank followed by finit... 261 &data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); 262&set_label("no_x87"); 263 &lea ("eax",&DWP(4,"esp")); 264 &ret (); 265&function_end_B("OPENSSL_wipe_cpu"); 266 267&function_begin_B("OPENSSL_atomic_add"); 268 &mov ("edx",&DWP(4,"esp")); # fetch the pointer, 1st arg 269 &mov ("ecx",&DWP(8,"esp")); # fetch the increment, 2nd arg 270 &push ("ebx"); 271 &nop (); 272 &mov ("eax",&DWP(0,"edx")); 273&set_label("spin"); 274 &lea ("ebx",&DWP(0,"eax","ecx")); 275 &nop (); 276 &data_word(0x1ab10ff0); # lock; cmpxchg %ebx,(%edx) # %eax is envolved and is always reloaded 277 &jne (&label("spin")); 278 &mov ("eax","ebx"); # OpenSSL expects the new value 279 &pop ("ebx"); 280 &ret (); 281&function_end_B("OPENSSL_atomic_add"); 282 283# This function can become handy under Win32 in situations when 284# we don't know which calling convention, __stdcall or __cdecl(*), 285# indirect callee is using. In C it can be deployed as 286# 287#ifdef OPENSSL_CPUID_OBJ 288# type OPENSSL_indirect_call(void *f,...); 289# ... 290# OPENSSL_indirect_call(func,[up to $max arguments]); 291#endif 292# 293# (*) it's designed to work even for __fastcall if number of 294# arguments is 1 or 2! 295&function_begin_B("OPENSSL_indirect_call"); 296 { 297 my ($max,$i)=(7,); # $max has to be chosen as 4*n-1 298 # in order to preserve eventual 299 # stack alignment 300 &push ("ebp"); 301 &mov ("ebp","esp"); 302 &sub ("esp",$max*4); 303 &mov ("ecx",&DWP(12,"ebp")); 304 &mov (&DWP(0,"esp"),"ecx"); 305 &mov ("edx",&DWP(16,"ebp")); 306 &mov (&DWP(4,"esp"),"edx"); 307 for($i=2;$i<$max;$i++) 308 { 309 # Some copies will be redundant/bogus... 310 &mov ("eax",&DWP(12+$i*4,"ebp")); 311 &mov (&DWP(0+$i*4,"esp"),"eax"); 312 } 313 &call_ptr (&DWP(8,"ebp"));# make the call... 314 &mov ("esp","ebp"); # ... and just restore the stack pointer 315 # without paying attention to what we called, 316 # (__cdecl *func) or (__stdcall *one). 317 &pop ("ebp"); 318 &ret (); 319 } 320&function_end_B("OPENSSL_indirect_call"); 321 322&function_begin_B("OPENSSL_cleanse"); 323 &mov ("edx",&wparam(0)); 324 &mov ("ecx",&wparam(1)); 325 &xor ("eax","eax"); 326 &cmp ("ecx",7); 327 &jae (&label("lot")); 328 &cmp ("ecx",0); 329 &je (&label("ret")); 330&set_label("little"); 331 &mov (&BP(0,"edx"),"al"); 332 &sub ("ecx",1); 333 &lea ("edx",&DWP(1,"edx")); 334 &jnz (&label("little")); 335&set_label("ret"); 336 &ret (); 337 338&set_label("lot",16); 339 &test ("edx",3); 340 &jz (&label("aligned")); 341 &mov (&BP(0,"edx"),"al"); 342 &lea ("ecx",&DWP(-1,"ecx")); 343 &lea ("edx",&DWP(1,"edx")); 344 &jmp (&label("lot")); 345&set_label("aligned"); 346 &mov (&DWP(0,"edx"),"eax"); 347 &lea ("ecx",&DWP(-4,"ecx")); 348 &test ("ecx",-4); 349 &lea ("edx",&DWP(4,"edx")); 350 &jnz (&label("aligned")); 351 &cmp ("ecx",0); 352 &jne (&label("little")); 353 &ret (); 354&function_end_B("OPENSSL_cleanse"); 355 356&function_begin_B("OPENSSL_ia32_rdrand"); 357 &mov ("ecx",8); 358&set_label("loop"); 359 &rdrand ("eax"); 360 &jc (&label("break")); 361 &loop (&label("loop")); 362&set_label("break"); 363 &cmp ("eax",0); 364 &cmove ("eax","ecx"); 365 &ret (); 366&function_end_B("OPENSSL_ia32_rdrand"); 367 368&function_begin_B("OPENSSL_ia32_rdseed"); 369 &mov ("ecx",8); 370&set_label("loop"); 371 &rdseed ("eax"); 372 &jc (&label("break")); 373 &loop (&label("loop")); 374&set_label("break"); 375 &cmp ("eax",0); 376 &cmove ("eax","ecx"); 377 &ret (); 378&function_end_B("OPENSSL_ia32_rdseed"); 379 380&initseg("OPENSSL_cpuid_setup"); 381 382&hidden("OPENSSL_cpuid_setup"); 383&hidden("OPENSSL_ia32cap_P"); 384 385&asm_finish(); 386