x86_64cpuid.pl revision 1.10
1#!/usr/bin/env perl 2 3$flavour = shift; 4$output = shift; 5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 6 7$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 8( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 9( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or 10die "can't locate x86_64-xlate.pl"; 11 12open OUT,"| \"$^X\" $xlate $flavour $output"; 13*STDOUT=*OUT; 14 15($arg1,$arg2,$arg3,$arg4)=("%rdi","%rsi","%rdx","%rcx"); # Unix order 16 17print<<___; 18.extern OPENSSL_cpuid_setup 19.hidden OPENSSL_cpuid_setup 20.section .init 21 call OPENSSL_cpuid_setup 22 23.hidden OPENSSL_ia32cap_P 24.comm OPENSSL_ia32cap_P,8,4 25 26.text 27 28.globl OPENSSL_atomic_add 29.type OPENSSL_atomic_add,\@abi-omnipotent 30.align 16 31OPENSSL_atomic_add: 32 movl ($arg1),%eax 33.Lspin: leaq ($arg2,%rax),%r8 34 .byte 0xf0 # lock 35 cmpxchgl %r8d,($arg1) 36 jne .Lspin 37 movl %r8d,%eax 38 .byte 0x48,0x98 # cltq/cdqe 39 ret 40.size OPENSSL_atomic_add,.-OPENSSL_atomic_add 41 42.globl OPENSSL_ia32_cpuid 43.type OPENSSL_ia32_cpuid,\@abi-omnipotent 44.align 16 45OPENSSL_ia32_cpuid: 46 mov %rbx,%r8 # save %rbx 47 48 xor %eax,%eax 49 cpuid 50 mov %eax,%r11d # max value for standard query level 51 52 xor %eax,%eax 53 cmp \$0x756e6547,%ebx # "Genu" 54 setne %al 55 mov %eax,%r9d 56 cmp \$0x49656e69,%edx # "ineI" 57 setne %al 58 or %eax,%r9d 59 cmp \$0x6c65746e,%ecx # "ntel" 60 setne %al 61 or %eax,%r9d # 0 indicates Intel CPU 62 jz .Lintel 63 64 cmp \$0x68747541,%ebx # "Auth" 65 setne %al 66 mov %eax,%r10d 67 cmp \$0x69746E65,%edx # "enti" 68 setne %al 69 or %eax,%r10d 70 cmp \$0x444D4163,%ecx # "cAMD" 71 setne %al 72 or %eax,%r10d # 0 indicates AMD CPU 73 jnz .Lintel 74 75 # AMD specific 76 mov \$0x80000000,%eax 77 cpuid 78 cmp \$0x80000001,%eax 79 jb .Lintel 80 mov %eax,%r10d 81 mov \$0x80000001,%eax 82 cpuid 83 or %ecx,%r9d 84 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 85 86 cmp \$0x80000008,%r10d 87 jb .Lintel 88 89 mov \$0x80000008,%eax 90 cpuid 91 movzb %cl,%r10 # number of cores - 1 92 inc %r10 # number of cores 93 94 mov \$1,%eax 95 cpuid 96 bt \$28,%edx # test hyper-threading bit 97 jnc .Lgeneric 98 shr \$16,%ebx # number of logical processors 99 cmp %r10b,%bl 100 ja .Lgeneric 101 and \$0xefffffff,%edx # ~(1<<28) 102 jmp .Lgeneric 103 104.Lintel: 105 cmp \$4,%r11d 106 mov \$-1,%r10d 107 jb .Lnocacheinfo 108 109 mov \$4,%eax 110 mov \$0,%ecx # query L1D 111 cpuid 112 mov %eax,%r10d 113 shr \$14,%r10d 114 and \$0xfff,%r10d # number of cores -1 per L1D 115 116.Lnocacheinfo: 117 mov \$1,%eax 118 cpuid 119 and \$0xbfefffff,%edx # force reserved bits to 0 120 cmp \$0,%r9d 121 jne .Lnotintel 122 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs 123 and \$15,%ah 124 cmp \$15,%ah # examine Family ID 125 jne .Lnotintel 126 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR 127.Lnotintel: 128 bt \$28,%edx # test hyper-threading bit 129 jnc .Lgeneric 130 and \$0xefffffff,%edx # ~(1<<28) 131 cmp \$0,%r10d 132 je .Lgeneric 133 134 or \$0x10000000,%edx # 1<<28 135 shr \$16,%ebx 136 cmp \$1,%bl # see if cache is shared 137 ja .Lgeneric 138 and \$0xefffffff,%edx # ~(1<<28) 139.Lgeneric: 140 and \$0x00000800,%r9d # isolate AMD XOP flag 141 and \$0xfffff7ff,%ecx 142 or %ecx,%r9d # merge AMD XOP flag 143 144 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx 145 bt \$27,%r9d # check OSXSAVE bit 146 jnc .Lclear_avx 147 xor %ecx,%ecx # XCR0 148 .byte 0x0f,0x01,0xd0 # xgetbv 149 and \$6,%eax # isolate XMM and YMM state support 150 cmp \$6,%eax 151 je .Ldone 152.Lclear_avx: 153 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) 154 and %eax,%r9d # clear AVX, FMA and AMD XOP bits 155.Ldone: 156 shl \$32,%r9 157 mov %r10d,%eax 158 mov %r8,%rbx # restore %rbx 159 or %r9,%rax 160 ret 161.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid 162___ 163 164print<<___; 165.globl OPENSSL_wipe_cpu 166.type OPENSSL_wipe_cpu,\@abi-omnipotent 167.align 16 168OPENSSL_wipe_cpu: 169 pxor %xmm0,%xmm0 170 pxor %xmm1,%xmm1 171 pxor %xmm2,%xmm2 172 pxor %xmm3,%xmm3 173 pxor %xmm4,%xmm4 174 pxor %xmm5,%xmm5 175 pxor %xmm6,%xmm6 176 pxor %xmm7,%xmm7 177 pxor %xmm8,%xmm8 178 pxor %xmm9,%xmm9 179 pxor %xmm10,%xmm10 180 pxor %xmm11,%xmm11 181 pxor %xmm12,%xmm12 182 pxor %xmm13,%xmm13 183 pxor %xmm14,%xmm14 184 pxor %xmm15,%xmm15 185 xorq %rcx,%rcx 186 xorq %rdx,%rdx 187 xorq %rsi,%rsi 188 xorq %rdi,%rdi 189 xorq %r8,%r8 190 xorq %r9,%r9 191 xorq %r10,%r10 192 xorq %r11,%r11 193 leaq 8(%rsp),%rax 194 ret 195.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 196___ 197 198print<<___; 199.globl OPENSSL_ia32_rdrand 200.type OPENSSL_ia32_rdrand,\@abi-omnipotent 201.align 16 202OPENSSL_ia32_rdrand: 203 mov \$8,%ecx 204.Loop_rdrand: 205 rdrand %rax 206 jc .Lbreak_rdrand 207 loop .Loop_rdrand 208.Lbreak_rdrand: 209 cmp \$0,%rax 210 cmove %rcx,%rax 211 ret 212.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand 213___ 214 215close STDOUT; # flush 216